YONI fungal ITS2: making phyloseq object, calculating diversities and annotating FUNGuild

STEP 1: making phyloseq object

library('phyloseq')
library("cowplot")
library("dplyr")
library("ggplot2")
library("vegan")
library("microbiome")
library("tibble")
library("readxl")
library(tidyverse)

1. import OTUs and taxa

setwd('C:\\Users\\lehakkin\\PhD\\Fungal_ITS_soil_vertical_profile')

otu <- read.table("ITS_OTUs_mod.txt", sep="\t", header=FALSE)
tax <- read.table("ITS_OTUs.UNITEv10_sh_99.wang.taxonomy", sep="\t", header=FALSE)

Chec do the otu names match

dif <- setdiff(tax$V1, otu$V1) 
dif
## character(0)

Good, same OTUs in tax and OTU tables

Lets tweek the table row names and columns

# modify otu table

colnames(otu)=otu[c(1),]
# erase the first row, as now it is doubled
otu=otu[-c(1),]

# make first column into rownames
rownames(otu) <- otu$`OTU ID`
otu <- otu[, -c(1)]
# let's make a copy of tax table
tax.orig <- tax

# change column names
colnames(tax)[1] <- "OTU"
colnames(tax)[2] <- "taxa"

# and modify the tax, where in the str_remove_all . means any single character
tax <- tax %>%
  mutate(taxa = str_remove_all(taxa, ".__")) %>%
  separate(taxa,
           into = c("kingdom", "phylum", "class", "order", "family", "genus", "species"),
           sep = ";")

Everything went well but I got a warning message, because there are an extra ; at the end of the line so the last column is not made. which is correct. Let’s check if we now have NAs, just to check everything is ok.

sum(is.na(tax$OTU))
## [1] 0
sum(is.na(tax$kingdom))
## [1] 0
sum(is.na(tax$phylum))
## [1] 0
sum(is.na(tax$class))
## [1] 0
sum(is.na(tax$order))
## [1] 0
sum(is.na(tax$family))
## [1] 0
sum(is.na(tax$genus))
## [1] 0
sum(is.na(tax$species))
## [1] 0

Everything is ok since no column NA’s

Let’s remove the parentheses and numbers

tax$kingdom <- sub("\\(.*", "", tax$kingdom)
tax$phylum <- sub("\\(.*", "", tax$phylum)
tax$class <- sub("\\(.*", "", tax$class)
tax$order <- sub("\\(.*", "", tax$order)
tax$family <- sub("\\(.*", "", tax$family)
tax$genus <- sub("\\(.*", "", tax$genus)
tax$species <- sub("\\(.*", "", tax$species)

Check unique values of the higher taxons

paste("unique kingdoms: ", unique(tax$kingdom))
## [1] "unique kingdoms:  Fungi"
paste("And the unique phyla are: ")
## [1] "And the unique phyla are: "
unique(tax$phylum)
##  [1] "Ascomycota"            "Basidiomycota"         "Fungi_unclassified"   
##  [4] "Rozellomycota"         "Basidiobolomycota"     "Chytridiomycota"      
##  [7] "Mortierellomycota"     "unclassified"          "Glomeromycota"        
## [10] "Mucoromycota"          "Zoopagomycota"         "Monoblepharomycota"   
## [13] "Olpidiomycota"         "Kickxellomycota"       "Entorrhizomycota"     
## [16] "Aphelidiomycota"       "Neocallimastigomycota"

2. import meta data

samples <- read.csv2('C:\\Users\\lehakkin\\PhD\\Fungal_ITS_soil_vertical_profile\\sample_data_updated_root_biomass.csv')
# change some column names
colnames(samples)[which(names(samples) == "production_type")] <- "sample_type"

2.1. change otu table sample names

The sample names in otu-table and sample-table do not match, because I have modified the after sequencing. However, I still have the original sample names as an ID column in sample-table so let’s use this to make the otu-table sample names to match.

x <- otu

x <- t(x)
x <- as.data.frame(x)
x <- rownames_to_column(x)

# pick the ID and sample names
y <- samples[, c(1,2)]
# add the sample names to x
x <- left_join(y, x, by = c("ID"="rowname"))
# remove ID
x <- x[, -c(1)]
# make sample names into rownames
rownames(x) <- x$sampleID
# remove the sampleID column
x <- x[, -c(1)]
x <- t(x)
otu <- x
rm(x)
rm(y)

# make into data frame
otu <- as.data.frame(otu)
# make sure numbers are numbers
otu <- otu %>% mutate_if(is.character, as.numeric)

3. make phyloseq object

# make OTUs into row names for tax-table and sample data
rownames(tax) <- tax$OTU
tax <- tax[, -c(1)]
rownames(samples) <- samples$sampleID
samples <- samples[, -c(1)]

# make all the elements, otu, tax and sample data, into phyloseq compatible format
samples <- sample_data(samples)
otu <- as.matrix(otu)
otu <- otu_table(otu, taxa_are_rows = TRUE)
tax <- tax_table(as.matrix(tax))

# finally combine all the elemnt to make a phyloseq object
ps <- phyloseq(otu, tax, samples)
ps
## phyloseq-class experiment-level object
## otu_table()   OTU Table:         [ 70198 taxa and 142 samples ]
## sample_data() Sample Data:       [ 142 samples by 21 sample variables ]
## tax_table()   Taxonomy Table:    [ 70198 taxa by 7 taxonomic ranks ]

remove unclassified phyla and controls

Note: Unclassified phyla are called either:

“Fungi_unclassified” or “unclassified”

# let's first remove the two controls, which by the way had zero OTUs
ps <- subset_samples(ps, sampleID != "0ctrl-1")
ps <- subset_samples(ps, sampleID != "0ctrl-2")
ps
## phyloseq-class experiment-level object
## otu_table()   OTU Table:         [ 70198 taxa and 140 samples ]
## sample_data() Sample Data:       [ 140 samples by 21 sample variables ]
## tax_table()   Taxonomy Table:    [ 70198 taxa by 7 taxonomic ranks ]
# You have only submitted samples, not OTUs. Define prevalence of each taxa
# (in how many samples did each taxa appear at least once)
prev0 = apply(X = otu_table(ps),
              MARGIN = ifelse(taxa_are_rows(ps), yes = 1, no = 2),
              FUN = function(x){sum(x > 0)})

# Execute prevalence filter, using `prune_taxa()` function
ps = prune_taxa((prev0 > 0), ps)
ps
## phyloseq-class experiment-level object
## otu_table()   OTU Table:         [ 70198 taxa and 140 samples ]
## sample_data() Sample Data:       [ 140 samples by 21 sample variables ]
## tax_table()   Taxonomy Table:    [ 70198 taxa by 7 taxonomic ranks ]
rm(prev0)
rm(tax.orig)

#I will remove two: "Fungi_unclassified" and "unclassified"
ps <- phyloseq::subset_taxa(ps, phylum != "Fungi_unclassified" & phylum != "unclassified")
ps
## phyloseq-class experiment-level object
## otu_table()   OTU Table:         [ 68186 taxa and 140 samples ]
## sample_data() Sample Data:       [ 140 samples by 21 sample variables ]
## tax_table()   Taxonomy Table:    [ 68186 taxa by 7 taxonomic ranks ]

4. save phyloseq with sng

ps_sng <- ps
setwd('C:\\Users\\lehakkin\\PhD\\Fungal_ITS_soil_vertical_profile')
save(ps_sng, file='ps_phyloseq_with_sng')

5. Remove singletons and save phyloseq

ps <- filter_taxa(ps, function (x) {sum(x > 0) > 1}, prune=TRUE)

setwd('\\\\ad.helsinki.fi\\home\\l\\lehakkin\\Desktop\\PROJECT_YONI_fungal_ITS')
save(ps, file='ps_FINAL')

ps
## phyloseq-class experiment-level object
## otu_table()   OTU Table:         [ 20610 taxa and 140 samples ]
## sample_data() Sample Data:       [ 140 samples by 21 sample variables ]
## tax_table()   Taxonomy Table:    [ 20610 taxa by 7 taxonomic ranks ]

Without singletons, there are 20610 OTUs

Let’s get some other data from the ps

summarize_phyloseq(ps)
## [[1]]
## [1] "1] Min. number of reads = 44"
## 
## [[2]]
## [1] "2] Max. number of reads = 410851"
## 
## [[3]]
## [1] "3] Total number of reads = 11539503"
## 
## [[4]]
## [1] "4] Average number of reads = 82425.0214285714"
## 
## [[5]]
## [1] "5] Median number of reads = 79653.5"
## 
## [[6]]
## [1] "7] Sparsity = 0.928178415470992"
## 
## [[7]]
## [1] "6] Any OTU sum to 1 or less? NO"
## 
## [[8]]
## [1] "8] Number of singletons = 0"
## 
## [[9]]
## [1] "9] Percent of OTUs that are singletons \n        (i.e. exactly one read detected across all samples)0"
## 
## [[10]]
## [1] "10] Number of sample variables are: 21"
## 
## [[11]]
##  [1] "sampleID"          "plot"              "sampling_position"
##  [4] "depth"             "depth_numerical"   "vegetation"       
##  [7] "sample_type"       "root_mgg"          "pH_H2O"           
## [10] "EC_uScm"           "C_g_per_kg"        "N_gkg"            
## [13] "TP_gkg"            "Alox_mmolkg"       "Feox_mmolkg"      
## [16] "oxides_mmolkg"     "PH2O_mgkg"         "Porg_mgkg"        
## [19] "DOC_mgkg"          "Pinorg_mgkg"       "C_per_N"

6. Check low readcounts

readcount(ps) %>% sort(decreasing = FALSE) %>% head()
##  CG9.1_40to70  NG2B3_30to40  NG2B2_40to70 CPO5.2_40to70  CG9.3_40to70 
##            44           526           811          7818          8588 
## OG10.1_40to80 
##          9882

There is only one sample with low read count (it is conventional treatment deep sample) whereas the others have quite ok (more than 500). However, I tested (not shown here) that keeping the low read count sample in the data does not change any of the analysis so it may still represent the fungal community(?)

7. IMPORTANT! Bug in the TAX table!!

I noticed that there is a mistake in the taxonomy, so that sometimes I have “unclassified” annotation at lets say genus level, although I have annotation at e.g. family level

In these cases I rather have at the genus level the annotation “family_unclassified”

tax <- as.data.frame(tax_table(ps))

I checked that there is no unclassified at phylum level

So let’s change first at class level.

tax$class <- ifelse(tax$class == "unclassified", paste(tax$phylum, sep = "_", "unclassified"), as.character(tax$class))   

And do same for all rest of the ranks, but I have to replace the “_unclassified_unclassified” with “_unclassified” afterwards

I know, there must be a more elegant way to do this:)

Change for order

tax$order <- ifelse(tax$order == "unclassified", paste(tax$class, sep = "_", "unclassified"), as.character(tax$order))   
tax[] <- lapply(tax, gsub, pattern = "unclassified_unclassified", replacement = "unclassified", fixed = TRUE)

Change for family

tax$family <- ifelse(tax$family == "unclassified", paste(tax$order, sep = "_", "unclassified"), as.character(tax$family))   
tax[] <- lapply(tax, gsub, pattern = "unclassified_unclassified", replacement = "unclassified", fixed = TRUE)

Change for genus

tax$genus <- ifelse(tax$genus == "unclassified", paste(tax$family, sep = "_", "unclassified"), as.character(tax$genus))   
tax[] <- lapply(tax, gsub, pattern = "unclassified_unclassified", replacement = "unclassified", fixed = TRUE)

Change for species

tax$species <- ifelse(tax$species == "unclassified", paste(tax$genus, sep = "_", "unclassified"), as.character(tax$species))   
tax[] <- lapply(tax, gsub, pattern = "unclassified_unclassified", replacement = "unclassified", fixed = TRUE)

8. Save

Save the new modified ps as the new “final version”

setwd('C:\\Users\\lehakkin\\PhD\\Fungal_ITS_soil_vertical_profile')

tax_table(ps) <- tax_table(as.matrix(tax))

save(ps, file='ps_FINAL')

ps
## phyloseq-class experiment-level object
## otu_table()   OTU Table:         [ 20610 taxa and 140 samples ]
## sample_data() Sample Data:       [ 140 samples by 21 sample variables ]
## tax_table()   Taxonomy Table:    [ 20610 taxa by 7 taxonomic ranks ]

STEP 2: Calculating diversities

library("ggpubr")
library('phyloseq')
library("cowplot")
library("dplyr")
library("ggplot2")
library("vegan")
library("microbiome")
library("tibble")
library(stringr)
library(reshape2)
library(tidyr)

setwd('C:\\Users\\lehakkin\\PhD\\Fungal_ITS_soil_vertical_profile')

load('ps_FINAL')
ps
## phyloseq-class experiment-level object
## otu_table()   OTU Table:         [ 20610 taxa and 140 samples ]
## sample_data() Sample Data:       [ 140 samples by 21 sample variables ]
## tax_table()   Taxonomy Table:    [ 20610 taxa by 7 taxonomic ranks ]
meta <- meta(ps)

Richness figures

1. Change metadata numerics

# change natural_grass to meadow
 meta <- data.frame(lapply(meta, function(x) {gsub("Natural_grass", "meadow", x)}))
 rownames(meta) <- meta$sampleID
 
 # Column indices to be converted numeric
i <- c(5, 8:21)
meta[, i] <- apply(meta[, i], 2, function(x) as.numeric(as.character(x)))

# lets chance the depth to soil layer mid-depth
meta$depth_numerical[meta$depth_numerical == 0] <- 5
meta$depth_numerical[meta$depth_numerical == 10] <- 15
meta$depth_numerical[meta$depth_numerical == 20] <- 25
meta$depth_numerical[meta$depth_numerical == 30] <- 35
meta$depth_numerical[meta$depth_numerical == 40] <- 60


sample_data(ps) <- sample_data(meta)                

setwd('C:\\Users\\lehakkin\\PhD\\Fungal_ITS_soil_vertical_profile')
save(ps, file='ps_FINAL')

2. Calculate diversities

2.1. First without singletons

# calculate all diversity measures
div <- microbiome::alpha(ps, index = "all")

# Assign the estimated diversity and richness to sample metadata
sample_data(ps)$observed <- div$observed
sample_data(ps)$chao1 <- div$chao1
sample_data(ps)$shannon <- div$diversity_shannon

meta <- meta(ps)

meta$sample_type <- as.factor(meta$sample_type)

meta$sample_type <- factor(meta$sample_type, levels = c("forest", "meadow", "organic", "conventional"))

sample_data(ps) <- sample_data(meta)                

setwd('C:\\Users\\lehakkin\\PhD\\Fungal_ITS_soil_vertical_profile')
save(ps, file='ps_FINAL')
ps
## phyloseq-class experiment-level object
## otu_table()   OTU Table:         [ 20610 taxa and 140 samples ]
## sample_data() Sample Data:       [ 140 samples by 24 sample variables ]
## tax_table()   Taxonomy Table:    [ 20610 taxa by 7 taxonomic ranks ]

2.2 with singletons

I will use the one without singletons, but just for comparison, I also calculate with singletons

setwd('C:\\Users\\lehakkin\\PhD\\Fungal_ITS_soil_vertical_profile')
load('ps_phyloseq_with_sng')
ps_sng
## phyloseq-class experiment-level object
## otu_table()   OTU Table:         [ 68186 taxa and 140 samples ]
## sample_data() Sample Data:       [ 140 samples by 21 sample variables ]
## tax_table()   Taxonomy Table:    [ 68186 taxa by 7 taxonomic ranks ]
div <- microbiome::alpha(ps_sng, index = "all")

# Assign the estimated diversity to sample metadata
sample_data(ps)$observed_sng <- div$observed
sample_data(ps)$chao1_sng <- div$chao1
sample_data(ps)$shannon_sng <- div$diversity_shannon

meta <- meta(ps)
    
save(ps, file='ps_FINAL')

3. plot diversity

color palette

# create your own color palette for sample types
MyPalette <- c(forest = "#1167b1", meadow = "#fbc02d", organic = "#8a8a8a", conventional =  "#b71c1c")

plot depthwise

# OTU richness
OTU_rich <- meta %>%
  dplyr::group_by(sample_type, depth_numerical) %>%
  dplyr::summarise(mean = mean(observed, na.rm = TRUE), se = (sd(observed, na.rm = TRUE)/sqrt(length((observed))))) %>%
  dplyr::ungroup() %>%
  ggplot(aes(y=mean, depth_numerical, color=sample_type)) +
  geom_line(linetype = "dashed") +
  geom_point(size=3, position=position_dodge(1.2)) +
  geom_errorbar(aes(ymin=mean-se, ymax=mean+se), width=.5, position=position_dodge(1.2)) + coord_flip() + scale_x_reverse() + theme(plot.title = element_text(size = 20, hjust = 0.5)) + theme(panel.border = element_rect(colour = "black", fill=NA, size=0.5)) + theme_cowplot() +
  theme(axis.text = element_text(size=16),
        axis.title = element_text(size=16),
        legend.text = element_text(size=18),
        legend.title = element_text(size=16),
        title = element_text(size=18)) +
  scale_y_continuous(name="OTU richness") +
  labs(x = "depth") + theme(legend.title = element_blank()) + scale_colour_manual(values = MyPalette)

OTU_rich

# shannon
shannon <- meta %>%
  dplyr::group_by(sample_type, depth_numerical) %>%
  dplyr::summarise(mean = mean(shannon, na.rm = TRUE), se = (sd(shannon, na.rm = TRUE)/sqrt(length((shannon))))) %>%
  dplyr::ungroup() %>%
  ggplot(aes(y=mean, depth_numerical, color=sample_type)) +
  geom_line(linetype = "dashed") +
  geom_point(size=3, position=position_dodge(1.2)) +
  geom_errorbar(aes(ymin=mean-se, ymax=mean+se), width=.5, position=position_dodge(1.2)) + coord_flip() + scale_x_reverse() + theme(plot.title = element_text(size = 20, hjust = 0.5)) + theme(panel.border = element_rect(colour = "black", fill=NA, size=0.5)) + theme_cowplot() +
  theme(axis.text = element_text(size=16),
        axis.title = element_text(size=16),
        legend.text = element_text(size=18),
        legend.title = element_text(size=16),
        title = element_text(size=18)) +
  scale_y_continuous(name="Shannon") +
  labs(x = "depth") + theme(legend.title = element_blank()) + scale_colour_manual(values = MyPalette)

shannon

3.2. plot with sng

plot depthwise

# OTU richness
OTU_rich_sng <- meta %>%
  dplyr::group_by(sample_type, depth_numerical) %>%
  dplyr::summarise(mean = mean(observed_sng, na.rm = TRUE), se = (sd(observed_sng, na.rm = TRUE)/sqrt(length((observed_sng))))) %>%
  dplyr::ungroup() %>%
  ggplot(aes(y=mean, depth_numerical, color=sample_type)) +
  geom_line(linetype = "dashed") +
  geom_point(size=3, position=position_dodge(1.2)) +
  geom_errorbar(aes(ymin=mean-se, ymax=mean+se), width=.5, position=position_dodge(1.2)) + coord_flip() + scale_x_reverse() + theme(plot.title = element_text(size = 20, hjust = 0.5)) + theme(panel.border = element_rect(colour = "black", fill=NA, size=0.5)) + theme_cowplot() +
  theme(axis.text = element_text(size=16),
        axis.title = element_text(size=16),
        legend.text = element_text(size=18),
        legend.title = element_text(size=16),
        title = element_text(size=18)) +
  scale_y_continuous(name="OTU richness") +
  labs(x = "depth") + theme(legend.title = element_blank()) + scale_colour_manual(values = MyPalette)

OTU_rich_sng

Keeping the singletons does not change the comparison of soil layers and treatments

# shannon
shannon_sng <- meta %>%
  dplyr::group_by(sample_type, depth_numerical) %>%
  dplyr::summarise(mean = mean(shannon_sng, na.rm = TRUE), se = (sd(shannon_sng, na.rm = TRUE)/sqrt(length((shannon_sng))))) %>%
  dplyr::ungroup() %>%
  ggplot(aes(y=mean, depth_numerical, color=sample_type)) +
  geom_line(linetype = "dashed") +
  geom_point(size=3, position=position_dodge(1.2)) +
  geom_errorbar(aes(ymin=mean-se, ymax=mean+se), width=.5, position=position_dodge(1.2)) + coord_flip() + scale_x_reverse() + theme(plot.title = element_text(size = 20, hjust = 0.5)) + theme(panel.border = element_rect(colour = "black", fill=NA, size=0.5)) + theme_cowplot() +
  theme(axis.text = element_text(size=16),
        axis.title = element_text(size=16),
        legend.text = element_text(size=18),
        legend.title = element_text(size=16),
        title = element_text(size=18)) +
  scale_y_continuous(name="Shannon") +
  labs(x = "depth") + theme(legend.title = element_blank()) + scale_colour_manual(values = MyPalette)

shannon


STEP 3: Annotating OTUs with FUNGuild

library('phyloseq')
library("cowplot")
library("dplyr")
library("ggplot2")
library("vegan")
library("microbiome")
library("tibble")
library("RColorBrewer") # nice color options
library(multcompView)
library(rcompanion)
library(car)
library(multcomp)
library(stringr)
library(ggrepel)
library(MicEco)
library(metagMisc)

setwd('C:\\Users\\lehakkin\\PhD\\Fungal_ITS_soil_vertical_profile')

load('ps_FINAL')
ps
## phyloseq-class experiment-level object
## otu_table()   OTU Table:         [ 20610 taxa and 140 samples ]
## sample_data() Sample Data:       [ 140 samples by 27 sample variables ]
## tax_table()   Taxonomy Table:    [ 20610 taxa by 7 taxonomic ranks ]

1. Load and save FUNGuild

This is how one would acquire the funguild database, but I will use the one I previously acquired

#FG <- parse_funguild()
#attr(FG, "DownloadDate")  # Check when the database was downloaded
#setwd('C:\\Users\\lehakkin\\PhD\\Fungal_ITS_soil_vertical_profile')
#save FG
#write.csv2(FG, file = "FUNGuild_31_05_2024.csv", row.names = FALSE)

I have previously uploaded the FUNGuild database and will use that version here for continuity. I used a version downloaded: “Fri May 31 19:45:41 2024”

2. Annotate FUNGuild at different taxonomic levels

I need to annotate separately at different taxonomic level

setwd('C:\\Users\\lehakkin\\PhD\\Fungal_ITS_soil_vertical_profile')

FG <- read.csv2("FUNGuild_31_05_2024.csv")

# what different levels there are:
unique(FG$taxonomicLevel)
## [1] "Genus"      "Species"    "Variety"    "Family"     "Order"     
## [6] "Phylum"     "Form"       "Subspecies"

I will annotate with Species, Genus, Family, Order, Phylum level

2.1. species

fg <- FG[FG$taxonomicLevel == "Species", ]
tax_table <- as.data.frame(tax_table(ps))
tax_table$OTU <- rownames(tax_table)

# fg_sp doesn't have underscores, so lets add them
fg$taxon <- sub(" ", "_", fg$taxon)
# merge tables
colnames(fg)[1] <- "species"
FG_tax_table <- merge(tax_table, fg, by = "species", all.x = TRUE)


# modify "possible"s to NA's at guild and trophic mode -level
FG_tax_table$guild[FG_tax_table$confidenceRanking == "Possible"] <- NA
FG_tax_table$trophicMode[FG_tax_table$confidenceRanking == "Possible"] <- NA

# only keep the OTU, TrophicMode and guild
FG_tax_table <- FG_tax_table[, c(8, 12, 13)]

# and OTU as row names
FG_tax_table <- column_to_rownames(FG_tax_table, var = "OTU")

# change column names
colnames(FG_tax_table)[1] <- "trophicMode_sp"
colnames(FG_tax_table)[2] <- "guild_sp"

# save with new name
FUNGuild_sp <- FG_tax_table

2.2. Genus

fg <- FG[FG$taxonomicLevel == "Genus", ]      
tax_table <- as.data.frame(tax_table(ps))
tax_table$OTU <- rownames(tax_table)

# merge tables
colnames(fg)[1] <- "genus"
FG_tax_table <- merge(tax_table, fg, by = "genus", all.x = TRUE)


# modify "possible"s to NA's at guild and trophic mode -level
FG_tax_table$guild[FG_tax_table$confidenceRanking == "Possible"] <- NA
FG_tax_table$trophicMode[FG_tax_table$confidenceRanking == "Possible"] <- NA

# only keep the OTU, TrophicMode and guild
# and OTU as row names
FG_tax_table <- FG_tax_table[, c(8, 12, 13)]

# and OTU as row names
FG_tax_table <- column_to_rownames(FG_tax_table, var = "OTU")


# change column names
colnames(FG_tax_table)[1] <- "trophicMode_gen"
colnames(FG_tax_table)[2] <- "guild_gen"

# save with new name
FUNGuild_gen <- FG_tax_table

2.3. Family

fg <- FG[FG$taxonomicLevel == "Family", ]      
tax_table <- as.data.frame(tax_table(ps))
tax_table$OTU <- rownames(tax_table)

# merge tables
colnames(fg)[1] <- "family"
FG_tax_table <- merge(tax_table, fg, by = "family", all.x = TRUE)

# modify "possible"s to NA's at guild and trophic mode -level
FG_tax_table$guild[FG_tax_table$confidenceRanking == "Possible"] <- NA
FG_tax_table$trophicMode[FG_tax_table$confidenceRanking == "Possible"] <- NA

# only keep the OTU, TrophicMode and guild
FG_tax_table <- FG_tax_table[, c(8, 12, 13)]

# and taxon as row names
FG_tax_table <- column_to_rownames(FG_tax_table, var = "OTU")

# change column names
colnames(FG_tax_table)[1] <- "trophicMode_fam"
colnames(FG_tax_table)[2] <- "guild_fam"

# save with new name
FUNGuild_fam <- FG_tax_table

2.4. Order

fg <- FG[FG$taxonomicLevel == "Order", ]      
tax_table <- as.data.frame(tax_table(ps))
tax_table$OTU <- rownames(tax_table)

# merge tables
colnames(fg)[1] <- "order"
FG_tax_table <- merge(tax_table, fg, by = "order", all.x = TRUE)

# modify "possible"s to NA's at guild and trophic mode -level
FG_tax_table$guild[FG_tax_table$confidenceRanking == "Possible"] <- NA
FG_tax_table$trophicMode[FG_tax_table$confidenceRanking == "Possible"] <- NA

# only keep the OTU, TrophicMode and guild
FG_tax_table <- FG_tax_table[, c(8, 12, 13)]

# and taxon as row names
FG_tax_table <- column_to_rownames(FG_tax_table, var = "OTU")

# change column names
colnames(FG_tax_table)[1] <- "trophicMode_ord"
colnames(FG_tax_table)[2] <- "guild_ord"

# save with new name
FUNGuild_ord <- FG_tax_table

2.5. Phylum

fg <- FG[FG$taxonomicLevel == "Phylum", ]      
tax_table <- as.data.frame(tax_table(ps))
tax_table$OTU <- rownames(tax_table)

# merge tables
colnames(fg)[1] <- "phylum"
FG_tax_table <- merge(tax_table, fg, by = "phylum", all.x = TRUE)

# modify "possible"s to NA's at guild and trophic mode -level
FG_tax_table$guild[FG_tax_table$confidenceRanking == "Possible"] <- NA
FG_tax_table$trophicMode[FG_tax_table$confidenceRanking == "Possible"] <- NA

# only keep the OTU, TrophicMode and guild
FG_tax_table <- FG_tax_table[, c(8, 12, 13)]

# and OTU as row names
FG_tax_table <- column_to_rownames(FG_tax_table, var = "OTU")

# change column names
colnames(FG_tax_table)[1] <- "trophicMode_phy"
colnames(FG_tax_table)[2] <- "guild_phy"

# save with new name
FUNGuild_phy <- FG_tax_table

2.6. combine all annotations

species Genus Family Order Phylum

x <- left_join(rownames_to_column(FUNGuild_sp), rownames_to_column(FUNGuild_gen), by = "rowname")
x <- left_join(x, rownames_to_column(FUNGuild_fam), by = "rowname")
x <- left_join(x, rownames_to_column(FUNGuild_ord), by = "rowname")
x <- left_join(x, rownames_to_column(FUNGuild_phy), by = "rowname")
# get the value from another column if NA: now from genus 
y <- x %>%
  mutate(trophicMode_sp = coalesce(trophicMode_sp,trophicMode_gen))

# get the value from another column if NA: now from genus
y <- y %>%
  mutate(guild_sp = coalesce(guild_sp,guild_gen))

# same for family
# get the value from another column if NA
y <- y %>%
  mutate(trophicMode_sp = coalesce(trophicMode_sp,trophicMode_fam))

# get the value from another column if NA
y <- y %>%
  mutate(guild_sp = coalesce(guild_sp,guild_fam))

# same for order
# get the value from another column if NA
y <- y %>%
  mutate(trophicMode_sp = coalesce(trophicMode_sp,trophicMode_ord))

# get the value from another column if NA
y <- y %>%
  mutate(guild_sp = coalesce(guild_sp,guild_ord))

# same for phylum
# get the value from another column if NA
y <- y %>%
  mutate(trophicMode_sp = coalesce(trophicMode_sp,trophicMode_phy))

# get the value from another column if NA
y <- y %>%
  mutate(guild_sp = coalesce(guild_sp,guild_phy))

# then rename the sp columns
colnames(y)[2] <- "trophicMode"
colnames(y)[3] <- "guild"


# remove the rest of the columns
y <- y[, -c(4:11)]

# rownames
y2 <- y[,-1]
rownames(y2) <- y[,1]
# how many of different trophic modes and NAs?
table(y2$trophicMode, useNA = "ifany")
## 
##                         Pathotroph   Pathotroph-Pathotroph-Saprotroph 
##                                 20                                  7 
##  Pathotroph-Saprotroph-Symbiotroph                         Saprotroph 
##                                  7                                 37 
##                        Symbiotroph                         Pathotroph 
##                                148                               1480 
##              Pathotroph-Saprotroph  Pathotroph-Saprotroph-Symbiotroph 
##                               1973                                977 
##             Pathotroph-Symbiotroph                         Saprotroph 
##                                 98                               4805 
##             Saprotroph-Symbiotroph                        Symbiotroph 
##                               1179                                636 
##                               <NA> 
##                               9243
9243/20610
## [1] 0.4484716

So approximately 44.8% are NAs

3. Make initial ps_FG

y3 <- left_join(rownames_to_column(y2), rownames_to_column(tax_table), by = "rowname")
row.names(y3) <- y3$rowname
y3 <- y3[, -1]
ps_FG <- phyloseq(otu_table(ps), tax_table(as.matrix(y3)), sample_data(ps))
ps_FG
## phyloseq-class experiment-level object
## otu_table()   OTU Table:         [ 20610 taxa and 140 samples ]
## sample_data() Sample Data:       [ 140 samples by 27 sample variables ]
## tax_table()   Taxonomy Table:    [ 20610 taxa by 10 taxonomic ranks ]
setwd('C:\\Users\\lehakkin\\PhD\\Fungal_ITS_soil_vertical_profile')

save(ps_FG, file = 'ps_FG_with_NAs')

20610 taxa; contains all the NAs

4. Remove empty spaces etc.

and remove also “|”

FG_tax_table <- as.data.frame(tax_table(ps_FG)) 
FG_tax_table$trophicMode <- gsub(" ", "", FG_tax_table$trophicMode, fixed = TRUE)
FG_tax_table$guild <- gsub("|", "", FG_tax_table$guild, fixed = TRUE)

5. Define AMFs, Ectomycorrhizal and Potential Plant pathogens

I will add a column “FUNGuild” where I curate some of the symbiotroph and pathotroph fungi according to my research interests. I am especially interested of AMF as they are important mycorrhiza in arable soils, but I also want to separate Ectomycorrhiza as they are important in forest. This leaves one more relatively big symbiotrophic guild, endophytes, which I will also specify. So the following curation will be done for FUNGuild column:

FG_tax_table <- FG_tax_table %>%  
   mutate(FG = case_when(grepl("Ectomycorrhizal", guild) & trophicMode=="Symbiotroph" ~ "Ectomycorrhizal", grepl("Arbuscular", guild) ~ "Arbuscular Mycorrhizal", guild == "Endophyte" & trophicMode=="Symbiotroph" ~ "Endophyte", guild=="Plant Pathogen" & trophicMode=="Pathotroph"~ "Plant Pathogen"))
FG_tax_table <- FG_tax_table %>%
  mutate(FUNGuild = case_when(FG == "Ectomycorrhizal" |  FG == "Arbuscular Mycorrhizal" | FG == "Endophyte" | FG == "Plant Pathogen" ~ FG, guild != "Ectomycorrhizal" |  FG != "Arbuscular Mycorrhizal" |  FG != "Endophyte" | FG != "Plant Pathogen" ~ FG_tax_table$trophicMode))


# remove the FG column

FG_tax_table <- FG_tax_table[, c(1:10, 12)]

Check the different written forms, if I have empty spaces?

unique(FG_tax_table$trophicMode)
## [1] NA                                  "Saprotroph-Symbiotroph"           
## [3] "Symbiotroph"                       "Pathotroph-Saprotroph-Symbiotroph"
## [5] "Pathotroph-Saprotroph"             "Pathotroph"                       
## [7] "Saprotroph"                        "Pathotroph-Symbiotroph"           
## [9] "Pathotroph-Pathotroph-Saprotroph"
#unique(FG_tax_table$guild)
unique(FG_tax_table$FUNGuild)
##  [1] NA                                  "Saprotroph-Symbiotroph"           
##  [3] "Arbuscular Mycorrhizal"            "Pathotroph-Saprotroph-Symbiotroph"
##  [5] "Pathotroph-Saprotroph"             "Pathotroph"                       
##  [7] "Saprotroph"                        "Endophyte"                        
##  [9] "Ectomycorrhizal"                   "Pathotroph-Symbiotroph"           
## [11] "Plant Pathogen"                    "Symbiotroph"                      
## [13] "Pathotroph-Pathotroph-Saprotroph"

There is a “bug” in the FUNGuild data, so that in addition to Pathotroph-Saprotroph there is the “wrong” one Pathotroph-Pathotroph-Saprotroph etc. I will correct these.

Also I will name Symbiotroh into Other Symbiotroph as I have extracted the AMF, EcM etc. from the symbiotroph trophic mode

# change some names
FG_tax_table$trophicMode[FG_tax_table$trophicMode=="Pathotroph-Pathotroph-Saprotroph"] <- "Pathotroph-Saprotroph"

FG_tax_table$FUNGuild[FG_tax_table$FUNGuild=="Symbiotroph"] <- "Other Symbiotroph"
FG_tax_table$FUNGuild[FG_tax_table$FUNGuild=="Pathotroph"] <- "Other Pathotroph"
FG_tax_table$FUNGuild[FG_tax_table$FUNGuild=="Pathotroph-Pathotroph-Saprotroph"] <- "Pathotroph-Saprotroph"


# reorder
FG_tax_table <- FG_tax_table[, c(1:2, 11, 3:10)]

Check again

unique(FG_tax_table$trophicMode)
## [1] NA                                  "Saprotroph-Symbiotroph"           
## [3] "Symbiotroph"                       "Pathotroph-Saprotroph-Symbiotroph"
## [5] "Pathotroph-Saprotroph"             "Pathotroph"                       
## [7] "Saprotroph"                        "Pathotroph-Symbiotroph"
#unique(FG_tax_table$guild)
unique(FG_tax_table$FUNGuild)
##  [1] NA                                  "Saprotroph-Symbiotroph"           
##  [3] "Arbuscular Mycorrhizal"            "Pathotroph-Saprotroph-Symbiotroph"
##  [5] "Pathotroph-Saprotroph"             "Other Pathotroph"                 
##  [7] "Saprotroph"                        "Endophyte"                        
##  [9] "Ectomycorrhizal"                   "Pathotroph-Symbiotroph"           
## [11] "Plant Pathogen"                    "Other Symbiotroph"

6. Save final ps_FG and check NA proportions

ps_FG <- phyloseq(otu_table(ps), tax_table(as.matrix(FG_tax_table)), sample_data(ps))
ps_FG
## phyloseq-class experiment-level object
## otu_table()   OTU Table:         [ 20610 taxa and 140 samples ]
## sample_data() Sample Data:       [ 140 samples by 27 sample variables ]
## tax_table()   Taxonomy Table:    [ 20610 taxa by 11 taxonomic ranks ]
setwd('C:\\Users\\lehakkin\\PhD\\Fungal_ITS_soil_vertical_profile')

save(ps_FG, file = 'ps_FG_with_NAs')

6.1 Check proportions of NAs

# lets calculate how big percentage of sequences where assigned (are not unknown)

ps_FG_RA <- transform(ps_FG, "compositional")

FG_RA_TmG <- aggregate_rare(ps_FG_RA, level = 'FUNGuild', detection = 0/100, prevalence = 0/100)
FG_RA_TmG
## phyloseq-class experiment-level object
## otu_table()   OTU Table:         [ 12 taxa and 140 samples ]
## sample_data() Sample Data:       [ 140 samples by 27 sample variables ]
## tax_table()   Taxonomy Table:    [ 12 taxa by 2 taxonomic ranks ]
glom <- tax_glom(FG_RA_TmG, taxrank = 'FUNGuild')
percentages <- psmelt(glom)
df <- percentages %>%
  group_by(OTU) %>%
  summarise_at(vars(Abundance), list(name = mean))

df
## # A tibble: 12 × 2
##    OTU                                   name
##    <chr>                                <dbl>
##  1 Arbuscular Mycorrhizal            0.0240  
##  2 Ectomycorrhizal                   0.0305  
##  3 Endophyte                         0.0206  
##  4 Other Pathotroph                  0.0323  
##  5 Other Symbiotroph                 0.000308
##  6 Pathotroph-Saprotroph             0.0850  
##  7 Pathotroph-Saprotroph-Symbiotroph 0.0324  
##  8 Pathotroph-Symbiotroph            0.00580 
##  9 Plant Pathogen                    0.0225  
## 10 Saprotroph                        0.292   
## 11 Saprotroph-Symbiotroph            0.150   
## 12 Unknown                           0.305

Note, 30.5% of reads were not assigned (smaller percentage than for the OTUs)

Check also the percentage of NAs within the four sample types: forest, meadow, organic and conventional. Do they differ?

FG_RA_Tm <- aggregate_rare(ps_FG_RA, level = 'trophicMode', detection = 0/100, prevalence = 0/100)

glom <- tax_glom(FG_RA_Tm, taxrank = 'trophicMode')
percentages <- psmelt(glom)
df <- percentages %>%
  group_by(sample_type, OTU) %>%
  summarise_at(vars(Abundance), list(name = mean))
df
## # A tibble: 32 × 3
## # Groups:   sample_type [4]
##    sample_type OTU                                   name
##    <fct>       <chr>                                <dbl>
##  1 forest      Pathotroph                        0.0104  
##  2 forest      Pathotroph-Saprotroph             0.0841  
##  3 forest      Pathotroph-Saprotroph-Symbiotroph 0.0315  
##  4 forest      Pathotroph-Symbiotroph            0.000181
##  5 forest      Saprotroph                        0.154   
##  6 forest      Saprotroph-Symbiotroph            0.176   
##  7 forest      Symbiotroph                       0.261   
##  8 forest      Unknown                           0.283   
##  9 meadow      Pathotroph                        0.0266  
## 10 meadow      Pathotroph-Saprotroph             0.0568  
## # ℹ 22 more rows

management NAs Forest 28.3 meadow 23.8 organic 31.8 conventional 35.9


RESULTS STEP 1: VENN diagrams and number of OTUs

Here, I will check the number of reads and OTUs and construct venn-diagrams for sample types (or management type) and soil layers.

library("ggpubr")
library('phyloseq')
library("cowplot")
library("dplyr")
library("ggplot2")
library("vegan")
library("microbiome")
library("tibble")
library(stringr)
library(reshape2)
library(tidyr)
library("MicEco")

setwd('C:\\Users\\lehakkin\\PhD\\Fungal_ITS_soil_vertical_profile')

load('ps_FINAL')
ps
## phyloseq-class experiment-level object
## otu_table()   OTU Table:         [ 20610 taxa and 140 samples ]
## sample_data() Sample Data:       [ 140 samples by 27 sample variables ]
## tax_table()   Taxonomy Table:    [ 20610 taxa by 7 taxonomic ranks ]
ps_RA <- microbiome::transform(ps, "compositional")

meta <- meta(ps)

1. How many reads and OTUs?

summarize_phyloseq(ps)
## [[1]]
## [1] "1] Min. number of reads = 44"
## 
## [[2]]
## [1] "2] Max. number of reads = 410851"
## 
## [[3]]
## [1] "3] Total number of reads = 11539503"
## 
## [[4]]
## [1] "4] Average number of reads = 82425.0214285714"
## 
## [[5]]
## [1] "5] Median number of reads = 79653.5"
## 
## [[6]]
## [1] "7] Sparsity = 0.928178415470992"
## 
## [[7]]
## [1] "6] Any OTU sum to 1 or less? NO"
## 
## [[8]]
## [1] "8] Number of singletons = 0"
## 
## [[9]]
## [1] "9] Percent of OTUs that are singletons \n        (i.e. exactly one read detected across all samples)0"
## 
## [[10]]
## [1] "10] Number of sample variables are: 27"
## 
## [[11]]
##  [1] "sampleID"          "plot"              "sampling_position"
##  [4] "depth"             "depth_numerical"   "vegetation"       
##  [7] "sample_type"       "root_mgg"          "pH_H2O"           
## [10] "EC_uScm"           "C_g_per_kg"        "N_gkg"            
## [13] "TP_gkg"            "Alox_mmolkg"       "Feox_mmolkg"      
## [16] "oxides_mmolkg"     "PH2O_mgkg"         "Porg_mgkg"        
## [19] "DOC_mgkg"          "Pinorg_mgkg"       "C_per_N"          
## [22] "observed"          "chao1"             "shannon"          
## [25] "observed_sng"      "chao1_sng"         "shannon_sng"

We obtained 11662127 fungal reads which clustered into 31714 OTUs in the 140 samples.

2. VENN

soil management

MyPalette <- c(forest = "#1167b1", meadow = "#fbc02d", organic = "#8a8a8a", conventional =  "#b71c1c")

# Note, relative is false, because I am using an alredy relative abundance transformed phyloseq object.
venn_no_prev <- ps_venn(
  ps_RA,
  "sample_type",
  fraction = 0,
  weight = FALSE,
  relative = FALSE,
  plot = TRUE,
  quantities = list(cex=0.7),
  fill = MyPalette
)
venn_no_prev

How many OTUs are shared by all sample types? I will pick the shared by all from the figure and divide it with the total OTU number to get the percentage

shared_by_all <- venn_no_prev[["data"]][["original.values"]][["forest&meadow&organic&conventional"]]
total_OTUs <- 20610
  
shared_by_all/total_OTUs
## [1] 0.1246967

12.5% of OTUs were shared by all management

soil layer

meta$new_depth <- meta$depth

meta$new_depth <- gsub("...", "-", meta$new_depth, fixed = TRUE)

meta$new_depth[meta$new_depth=="40-"] <- "40-80"

meta$new_depth[meta$new_depth=="0-10"] <- "0-10 cm"
meta$new_depth[meta$new_depth=="10-20"] <- "10-20 cm"
meta$new_depth[meta$new_depth=="20-30"] <- "20-30 cm"
meta$new_depth[meta$new_depth=="30-40"] <- "30-40 cm"
meta$new_depth[meta$new_depth=="40-80"] <- "40-80 cm"

sample_data(ps) <- sample_data(meta)

ps_RA <- transform(ps, "compositional")
  
# create your own color palette for sample types
MyPalette <- list(c('0-10 cm' = "#387212", '10-20 cm' = "#ADC476", '20-30 cm' = "#D8D2BA",'30-40 cm' = "#907852", '40-80 cm' = "#6A4C3A"))

venn_DEPTH <- ps_venn(
  ps_RA,
  "new_depth",
  fraction = 0,
  weight = FALSE,
  relative = TRUE,
  quantities = list(cex=0.7),
  plot = TRUE,
  fill = MyPalette[[1]]
)
venn_DEPTH

Again, how many OTUs are shared by all layers?

shared_by_all <- venn_DEPTH[["data"]][["original.values"]][["0-10 cm&10-20 cm&20-30 cm&30-40 cm&40-80 cm"]]
total_OTUs <- 20610
  
shared_by_all/total_OTUs
## [1] 0.04885978

4.9% of OTUs were shared by all layers

OTU allocation to layers

How much of the OTUs in the dataset were found in the first, first two or first three soil layers?

first three soil layers

ps_x <- subset_samples(ps, depth!="30...40" & depth!="40...")

# You have only submitted samples, not OTUs. Define prevalence of each taxa
# (in how many samples did each taxa appear at least once)
prev0 = apply(X = otu_table(ps_x),
              MARGIN = ifelse(taxa_are_rows(ps_x), yes = 1, no = 2),
              FUN = function(x){sum(x > 0)})

# Execute prevalence filter, using `prune_taxa()` function
ps_x = prune_taxa((prev0 > 0), ps_x)
ps_x
## phyloseq-class experiment-level object
## otu_table()   OTU Table:         [ 20130 taxa and 84 samples ]
## sample_data() Sample Data:       [ 84 samples by 28 sample variables ]
## tax_table()   Taxonomy Table:    [ 20130 taxa by 7 taxonomic ranks ]

In the first three layers: 20130 OTUs

OTUs <- 20130
total_OTUs <- 20610
  
OTUs/total_OTUs
## [1] 0.9767103

97.7% of all OTUs were found in the first 3 soil layers

first two soil layers

ps_x <- subset_samples(ps, depth!="20...30" & depth!="30...40" & depth!="40...")

# You have only submitted samples, not OTUs. Define prevalence of each taxa
# (in how many samples did each taxa appear at least once)
prev0 = apply(X = otu_table(ps_x),
              MARGIN = ifelse(taxa_are_rows(ps_x), yes = 1, no = 2),
              FUN = function(x){sum(x > 0)})

# Execute prevalence filter, using `prune_taxa()` function
ps_x = prune_taxa((prev0 > 0), ps_x)
ps_x
## phyloseq-class experiment-level object
## otu_table()   OTU Table:         [ 18392 taxa and 56 samples ]
## sample_data() Sample Data:       [ 56 samples by 28 sample variables ]
## tax_table()   Taxonomy Table:    [ 18392 taxa by 7 taxonomic ranks ]

In the first three layers: 18392 OTUs

OTUs <- 18392
total_OTUs <- 20610
  
OTUs/total_OTUs
## [1] 0.8923823

89.2% of all OTUs were found in the first 2 soil layers

first soil layer

ps_x <- subset_samples(ps, depth=="0...10")

# You have only submitted samples, not OTUs. Define prevalence of each taxa
# (in how many samples did each taxa appear at least once)
prev0 = apply(X = otu_table(ps_x),
              MARGIN = ifelse(taxa_are_rows(ps_x), yes = 1, no = 2),
              FUN = function(x){sum(x > 0)})

# Execute prevalence filter, using `prune_taxa()` function
ps_x = prune_taxa((prev0 > 0), ps_x)
ps_x
## phyloseq-class experiment-level object
## otu_table()   OTU Table:         [ 14737 taxa and 28 samples ]
## sample_data() Sample Data:       [ 28 samples by 28 sample variables ]
## tax_table()   Taxonomy Table:    [ 14737 taxa by 7 taxonomic ranks ]
OTUs <- 14737
total_OTUs <- 20610
  
OTUs/total_OTUs
## [1] 0.7150412

71.5% of all OTUs were found in the first soil layer

combine the figures

library(ggpubr)

fig <- ggarrange(venn_no_prev, venn_DEPTH,
                     ncol = 2, nrow = 1)

fig


RESULTS STEP 2: PCoA with OTUs and soil properties

library("ggpubr")
library('phyloseq')
library("cowplot")
library("dplyr")
library("ggplot2")
library("vegan")
library("microbiome")
library("tibble")
library(stringr)
library(reshape2)
library(tidyr)
library(goeveg)
library(metagMisc)

setwd('C:\\Users\\lehakkin\\PhD\\Fungal_ITS_soil_vertical_profile')

load('ps_FINAL')
ps
## phyloseq-class experiment-level object
## otu_table()   OTU Table:         [ 20610 taxa and 140 samples ]
## sample_data() Sample Data:       [ 140 samples by 27 sample variables ]
## tax_table()   Taxonomy Table:    [ 20610 taxa by 7 taxonomic ranks ]
meta <- meta(ps)

1. Root biomass log10 transformatio

hist(meta$root_mgg)

#perform Shapiro-Wilk Test
shapiro.test(meta$root_mgg)
## 
##  Shapiro-Wilk normality test
## 
## data:  meta$root_mgg
## W = 0.35847, p-value < 2.2e-16
#log transformation

meta <- meta %>%
  mutate(log_root = log10(root_mgg))

hist(meta$log_root)

#perform Shapiro-Wilk Test
shapiro.test(meta$log_root)
## 
##  Shapiro-Wilk normality test
## 
## data:  meta$log_root
## W = 0.98109, p-value = 0.04971
#now is normally distributed since p value is more than 0.05

save to ps

sample_data(ps) <- sample_data(meta)
setwd('C:\\Users\\lehakkin\\PhD\\Fungal_ITS_soil_vertical_profile')
save(ps, file='ps_FINAL')

2. Bray distance and PCoA

ps_RA <- microbiome::transform(ps, "compositional")

To visualize beta diversity, I will do a PCoA which is metric instead of e.g. non-metric NMDS

I will be following somewhat this tutorial:

Joey Bernhardt

OTU = as(otu_table(ps_RA), "matrix")
# transpose if necessary
if(taxa_are_rows(ps_RA)){OTU <- t(OTU)}
# Coerce to data.frame
OTU = as.data.frame(OTU)
OTU <- as.matrix(OTU)

bray_dist <- vegan::vegdist(OTU, method="bray")
str(bray_dist)
##  'dist' Named num [1:9730] 0.367 0.694 0.967 0.723 0.47 ...
##  - attr(*, "maxdist")= num 1
##  - attr(*, "Size")= int 140
##  - attr(*, "Labels")= chr [1:140] "CG9.1_0to10" "CG9.1_10to20" "CG9.1_20to30" "CG9.1_30to40" ...
##  - attr(*, "Diag")= logi FALSE
##  - attr(*, "Upper")= logi FALSE
##  - attr(*, "method")= chr "bray"
##  - attr(*, "call")= language vegan::vegdist(x = OTU, method = "bray")
# use k = 3 so we'll get calculations for three axes
pcoa <- cmdscale(bray_dist, eig=TRUE, k = 3)
ordiplot(scores(pcoa),display="sites", type="points")

Ordination with axes 1 and 2

Let’s first make PCoA ordination with axes 1 and 2, and later for 1 and 3.

Env. variables

# Post-Hoc Projections of environmental variables
# envfit in vegan projects points onto vectors that have maximum correlation with corresponding environmental variables
pcoa.env12 <- envfit(pcoa, meta[ ,c("pH_H2O", "C_g_per_kg", "N_gkg", "TP_gkg", "sample_type", "depth_numerical", "DOC_mgkg", "Pinorg_mgkg", "Porg_mgkg", "log_root", "C_per_N", "Feox_mmolkg", "Alox_mmolkg")], na.rm = TRUE, choices=c(1:2), permutations = 999)

# main effects
pcoa.env12
## 
## ***VECTORS
## 
##                     Dim1     Dim2     r2 Pr(>r)    
## pH_H2O           0.89803 -0.43993 0.4996  0.001 ***
## C_g_per_kg      -0.92647  0.37636 0.5577  0.001 ***
## N_gkg           -0.95072  0.31005 0.5692  0.001 ***
## TP_gkg          -0.93787 -0.34699 0.5026  0.001 ***
## depth_numerical  0.98087  0.19465 0.5300  0.001 ***
## DOC_mgkg        -0.76599  0.64286 0.4138  0.001 ***
## Pinorg_mgkg      0.17893 -0.98386 0.0740  0.010 ** 
## Porg_mgkg       -0.99894  0.04605 0.5753  0.001 ***
## log_root        -0.80130  0.59827 0.4884  0.001 ***
## C_per_N         -0.97551  0.21996 0.4224  0.001 ***
## Feox_mmolkg     -0.84349  0.53715 0.4468  0.001 ***
## Alox_mmolkg     -0.67701  0.73598 0.3176  0.001 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## Permutation: free
## Number of permutations: 999
## 
## ***FACTORS:
## 
## Centroids:
##                            Dim1    Dim2
## sample_typeforest        0.0679  0.1867
## sample_typemeadow       -0.0229  0.1780
## sample_typeorganic      -0.0098 -0.1293
## sample_typeconventional  0.0006 -0.1050
## 
## Goodness of fit:
##                 r2 Pr(>r)    
## sample_type 0.2013  0.001 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## Permutation: free
## Number of permutations: 999
## 
## 1 observation deleted due to missingness
ef12.adj <- pcoa.env12
pvals.adj <- p.adjust (pcoa.env12$vectors$pvals, method = 'bonferroni')
ef12.adj$vectors$pvals <- pvals.adj
ef12.adj
## 
## ***VECTORS
## 
##                     Dim1     Dim2     r2 Pr(>r)  
## pH_H2O           0.89803 -0.43993 0.4996  0.012 *
## C_g_per_kg      -0.92647  0.37636 0.5577  0.012 *
## N_gkg           -0.95072  0.31005 0.5692  0.012 *
## TP_gkg          -0.93787 -0.34699 0.5026  0.012 *
## depth_numerical  0.98087  0.19465 0.5300  0.012 *
## DOC_mgkg        -0.76599  0.64286 0.4138  0.012 *
## Pinorg_mgkg      0.17893 -0.98386 0.0740  0.120  
## Porg_mgkg       -0.99894  0.04605 0.5753  0.012 *
## log_root        -0.80130  0.59827 0.4884  0.012 *
## C_per_N         -0.97551  0.21996 0.4224  0.012 *
## Feox_mmolkg     -0.84349  0.53715 0.4468  0.012 *
## Alox_mmolkg     -0.67701  0.73598 0.3176  0.012 *
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## Permutation: free
## Number of permutations: 999
## 
## ***FACTORS:
## 
## Centroids:
##                            Dim1    Dim2
## sample_typeforest        0.0679  0.1867
## sample_typemeadow       -0.0229  0.1780
## sample_typeorganic      -0.0098 -0.1293
## sample_typeconventional  0.0006 -0.1050
## 
## Goodness of fit:
##                 r2 Pr(>r)    
## sample_type 0.2013  0.001 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## Permutation: free
## Number of permutations: 999
## 
## 1 observation deleted due to missingness

To plot (classical) mds (which is equivalent to PCoA) with ggplot a new datasheet needs to be created which contains the x,y points for each site. You can do this by calling the scores of you mds.

site.scrs <- as.data.frame(scores(pcoa, display = "sites")) #save pcoa results into dataframe
site.scrs <- cbind(site.scrs, soil_type = meta$sample_type) #add grouping variable "soil_type" to dataframe
site.scrs <- cbind(site.scrs, depth = meta$depth) #add grouping variable of depth

head(site.scrs)
##                     Dim1        Dim2         Dim3    soil_type   depth
## CG9.1_0to10  -0.32486951 -0.16939677  0.057117223 conventional  0...10
## CG9.1_10to20 -0.35683666 -0.18451022  0.010039481 conventional 10...20
## CG9.1_20to30  0.10483906 -0.26374886 -0.239911662 conventional 20...30
## CG9.1_30to40  0.49569905 -0.18888465 -0.132639765 conventional 30...40
## CG9.1_40to70  0.03411974 -0.01592131 -0.009914955 conventional   40...
## CG9.2_0to10  -0.24690800 -0.12969297  0.145831464 conventional  0...10

I will be following somewhat this tutorial for fitting the environmental variables etc:

jkzorz github

To show environmental extrinsic variables another datasheet needs to be created

Citation from the jkzorz github

“Extracting the required information from the envfit result is a bit more complicated. The envfit output contains information on the length of the segments for each variable. The segments are scaled to the r2 value, so that the environmental variables with a longer segment are more strongly correlated with the data than those with a shorter segment. You can extract this information with scores. Then these lengths are further scaled to fit the plot. This is done with a multiplier that is analysis specific, and can be accessed using the command ordiArrowMul(en). Below I multiply the scores by this multiplier to keep the coordinates in the correct proportion.”

Because my data contained continuous and categorical environmental variables, Im extracting the information from both separately using the vectors and factors options respectively.

categorial and continuous variables

# first categorial variables
env.scores_cat12 <- as.data.frame(scores(pcoa.env12, display = "factors"))*0.25 # multiply the envfit scores to keep them in the frame of the ordination
env.scores_cat12 <- cbind(env.scores_cat12, env.variables = rownames(env.scores_cat12)) #and then gives them their names

env.scores_cat12 <- cbind(env.scores_cat12, pval = pcoa.env12$factors$pvals) # add pvalues to dataframe
sig.env.scores_cat12 <- subset(env.scores_cat12, pval<=0.05) #subset data to show variables significant at 0.05
sig.env.scores_cat12
##                                  Dim1        Dim2           env.variables  pval
## sample_typeforest        0.0169631533  0.04667273       sample_typeforest 0.001
## sample_typemeadow       -0.0057255852  0.04448833       sample_typemeadow 0.001
## sample_typeorganic      -0.0024526659 -0.03233332      sample_typeorganic 0.001
## sample_typeconventional  0.0001453261 -0.02625329 sample_typeconventional 0.001
# all were significant


# then continous variables
env.scores_cont12 <- as.data.frame(scores(pcoa.env12, display = "vectors"))*0.25 # multiply the envfit scores to keep them in the frame of the ordination
env.scores_cont12 <- cbind(env.scores_cont12, env.variables = rownames(env.scores_cont12)) #and then gives them their names

env.scores_cont12 <- cbind(env.scores_cont12, pval = pcoa.env12$vectors$pvals) # add pvalues to dataframe
sig.env.scores_cont12 <- subset(env.scores_cont12, pval<=0.05) #subset data to show variables significant at 0.05
sig.env.scores_cont12
##                        Dim1         Dim2   env.variables  pval
## pH_H2O           0.15868133 -0.077735176          pH_H2O 0.001
## C_g_per_kg      -0.17297071  0.070265684      C_g_per_kg 0.001
## N_gkg           -0.17931771  0.058479126           N_gkg 0.001
## TP_gkg          -0.16622910 -0.061501047          TP_gkg 0.001
## depth_numerical  0.17852191  0.035426555 depth_numerical 0.001
## DOC_mgkg        -0.12318024  0.103379224        DOC_mgkg 0.001
## Pinorg_mgkg      0.01216617 -0.066897767     Pinorg_mgkg 0.010
## Porg_mgkg       -0.18942328  0.008732727       Porg_mgkg 0.001
## log_root        -0.14000008  0.104527314        log_root 0.001
## C_per_N         -0.15851066  0.035740927         C_per_N 0.001
## Feox_mmolkg     -0.14095001  0.089759046     Feox_mmolkg 0.001
## Alox_mmolkg     -0.09537901  0.103687400     Alox_mmolkg 0.001
# all were significant

species scores

A new dataset containing species data also needs to be made to look at species vectors.

# wascores computes Weighted Averages scores of species for ordination configuration or for environmental variables.
species.scores12 <- wascores(pcoa$points[,1:2], OTU)

species.scores13 <- wascores(pcoa$points[,1:3], OTU)

select OTUs: ordiselect

#  Ordiselect gives me more control for the significant OTUs to display
# I'm using 0.1 % of the most abundant and 100 % of the best fitting OTUs
# NOTE! the higher the ablim maybe more of the low diversity sample' OTUs shown..?

# The species scores of the 0.1% most abundant and 100% of the best fitting OTUs

ordis12 <- ordiselect(OTU, species.scores12, ablim = 0.001, fitlim = 1, choices = c(1,2), method = "axes", env = pcoa.env12)
## [1] "21 species selected (0.1% of total number of species)."
## [1] "All species selected which belong to the 0.1% most abundant species."
ordis12.species.scores <- species.scores12[ordis12, ]
ordis12.species.scores <- cbind(ordis12.species.scores, Species = rownames(ordis12.species.scores)) #add species names to dataframe

change into sp names

# OTUs into species names

OTU.sp <- as.data.frame(tax_table(ps_RA))
OTU.sp$OTU <- rownames(OTU.sp)
# remove other taxa
OTU.sp <- OTU.sp[ , -(1:6)]


try <- merge(data.frame(ordis12.species.scores), data.frame(OTU.sp), 
             by = 0,  all = FALSE)
#add row names
samp2 <- try[,-1]
rownames(samp2) <- try[,1]
try <- samp2
rm(samp2)
# remove the "Species"
try <- try[ , -(3)]
# remove "try"
ordis12.species.scores <- try
rm(try)

head(ordis12.species.scores)
##                          V1                 V2                        species
## OTU12776  0.126946387694976 -0.121543372900751             Clonostachys_rosea
## OTU139    0.200755588616414  0.046445493642642     Entomortierella_parvispora
## OTU13985 -0.237582025036352 0.0288145443505883            Saitozyma_podzolica
## OTU19296 -0.295916773006835 -0.149458330721953      Cladorrhinum_unclassified
## OTU20886  -0.25280082681134 -0.121756560222692 Paraphaeosphaeria_unclassified
## OTU23599  -0.22805912031261  0.169675633910535       Paraphaeosphaeria_viciae
##               OTU
## OTU12776 OTU12776
## OTU139     OTU139
## OTU13985 OTU13985
## OTU19296 OTU19296
## OTU20886 OTU20886
## OTU23599 OTU23599
# make values numeric
ordis12.species.scores$V1 <- as.numeric(ordis12.species.scores$V1)
ordis12.species.scores$V2 <- as.numeric(ordis12.species.scores$V2)

Now we have the relevant information for plotting the ordination in ggplot

MyPalette <- c(forest = "#1167b1", meadow = "#fbc02d", organic = "#8a8a8a", conventional =  "#b71c1c")

site.scrs$new_depth <- site.scrs$depth

site.scrs$new_depth <- gsub("...", "-", site.scrs$new_depth, fixed = TRUE)

site.scrs$new_depth[site.scrs$new_depth=="40-"] <- "40-80"

site.scrs$new_depth[site.scrs$new_depth=="0-10"] <- "0-10 cm"
site.scrs$new_depth[site.scrs$new_depth=="10-20"] <- "10-20 cm"
site.scrs$new_depth[site.scrs$new_depth=="20-30"] <- "20-30 cm"
site.scrs$new_depth[site.scrs$new_depth=="30-40"] <- "30-40 cm"
site.scrs$new_depth[site.scrs$new_depth=="40-80"] <- "40-80 cm"

get axis %

#First lets run the PCoA with phyloseq to get the axis percentages (plot_ordination gives axis percentages)

GP.ord <- ordinate(ps_RA, "PCoA", "bray")
p2 = plot_ordination(ps_RA, GP.ord, type="samples", color="sample_type", shape="depth")
p2

Remember to change the axis percentages accordingly below!!!

pcoa.plot <- ggplot() + geom_point(data=site.scrs, aes(Dim1, Dim2, colour = factor(site.scrs$soil_type), shape = factor(site.scrs$new_depth)), size = 6, alpha = 0.6, stroke = 1.5) + theme_cowplot() + theme(panel.background = element_rect(fill = NA, colour = "black", size = 1, linetype = "solid")) + labs(colour = "", shape = "") + theme(legend.text = element_text(size = 12), axis.text = element_text(size = 16)) + scale_colour_manual(values = MyPalette) + labs(y = "PC2 (8.0%)", x = "PC1 (20.0%)")


pcoa.plot

Add OTUs to the PCoA

First, modify the species names

unique(ordis12.species.scores$species)
##  [1] "Clonostachys_rosea"             "Entomortierella_parvispora"    
##  [3] "Saitozyma_podzolica"            "Cladorrhinum_unclassified"     
##  [5] "Paraphaeosphaeria_unclassified" "Paraphaeosphaeria_viciae"      
##  [7] "Solicoccozyma_terricola"        "Pseudeurotium_unclassified"    
##  [9] "Pseudeurotium_hygrophilum"      "Clavulina_cinerea"             
## [11] "Fusarium_asiaticum"             "Pseudogymnoascus_unclassified" 
## [13] "Pleotrichocladium_opacum"       "Leotiomycetes_unclassified"    
## [15] "Rhexocercosporidium_panacis"    "Helotiales_unclassified"       
## [17] "Pseudogymnoascus_roseus"        "Solicoccozyma_terrea"          
## [19] "Mortierella_antarctica"         "Glutinoglossum_heptaseptatum"

I want to remove the “unclassified” from the end

ordis12.species.scores$species <- gsub("_unclassified","",as.character(ordis12.species.scores$species))
pcoa.plot_OTU <- pcoa.plot + ggrepel::geom_text_repel(data = ordis12.species.scores, aes(x=V1,y=V2,label=species), alpha=0.7, cex = 3.5, direction = "both", segment.size = 0.2, max.overlaps = Inf) + theme(legend.position = c(0.86, 0.8), legend.text = element_text(size = 12)) 

pcoa.plot_OTU

Add env. variables to the PCoA

First, simplify the names

sig.env.scores_cont12$env.variables[sig.env.scores_cont12$env.variables == "pH_H2O"] <- "pH"
sig.env.scores_cont12$env.variables[sig.env.scores_cont12$env.variables == "C_g_per_kg"] <- "C"
sig.env.scores_cont12$env.variables[sig.env.scores_cont12$env.variables == "N_gkg"] <- "N"
sig.env.scores_cont12$env.variables[sig.env.scores_cont12$env.variables == "TP_gkg"] <- "P-tot"
sig.env.scores_cont12$env.variables[sig.env.scores_cont12$env.variables == "depth_numerical"] <- "depth"
sig.env.scores_cont12$env.variables[sig.env.scores_cont12$env.variables == "DOC_mgkg"] <- "DOC"
sig.env.scores_cont12$env.variables[sig.env.scores_cont12$env.variables == "Pinorg_mgkg"] <- "P-inorg"
sig.env.scores_cont12$env.variables[sig.env.scores_cont12$env.variables == "Porg_mgkg"] <- "P-org"
sig.env.scores_cont12$env.variables[sig.env.scores_cont12$env.variables == "log_root"] <- "log root"
sig.env.scores_cont12$env.variables[sig.env.scores_cont12$env.variables == "C_per_N"] <- "C/N"
sig.env.scores_cont12$env.variables[sig.env.scores_cont12$env.variables == "Feox_mmolkg"] <- "Fe-ox"
sig.env.scores_cont12$env.variables[sig.env.scores_cont12$env.variables == "Alox_mmolkg"] <- "Al-ox"

…then plot

p1 <- pcoa.plot_OTU +
  geom_segment(aes(x = 0, y = 0, xend = Dim1, yend = Dim2), data = sig.env.scores_cont12, size =1, alpha = 0.5, colour = "grey30") + ggrepel::geom_text_repel(data = sig.env.scores_cont12, aes(x = Dim1, y = Dim2), colour = "blue", fontface = "bold", label = sig.env.scores_cont12$env.variables, segment.size = 0.2, box.padding = unit(0.1, "lines"), point.padding = (0.1), force = 1, max.time = 30, nudge_y = 0.00, nudge_x = 0.00) 

p1

This was saved with width 1000 and height 700

PCoA ordination for axes 1 and 3

pcoa.env13 <- envfit(pcoa, meta[ ,c("pH_H2O", "C_g_per_kg", "N_gkg", "TP_gkg", "sample_type", "depth_numerical", "DOC_mgkg", "Pinorg_mgkg", "Porg_mgkg", "log_root", "C_per_N", "Feox_mmolkg", "Alox_mmolkg")], na.rm = TRUE, choices=c(1:3), permutations = 999)

pcoa.env13
## 
## ***VECTORS
## 
##                     Dim1     Dim2     Dim3     r2 Pr(>r)    
## pH_H2O           0.71582 -0.34967  0.60442 0.5947  0.001 ***
## C_g_per_kg      -0.70297  0.28453 -0.65182 0.6894  0.001 ***
## N_gkg           -0.74553  0.24218 -0.62092 0.6801  0.001 ***
## TP_gkg          -0.91468 -0.33863 -0.22064 0.5108  0.001 ***
## depth_numerical  0.82942  0.16519  0.53364 0.5935  0.001 ***
## DOC_mgkg        -0.57514  0.48141 -0.66141 0.5386  0.001 ***
## Pinorg_mgkg      0.12414 -0.67859  0.72395 0.1308  0.002 ** 
## Porg_mgkg       -0.84201  0.03811 -0.53812 0.6442  0.001 ***
## log_root        -0.70696  0.52696 -0.47172 0.5404  0.001 ***
## C_per_N         -0.78189  0.17543 -0.59823 0.4935  0.001 ***
## Feox_mmolkg     -0.67315  0.42760 -0.60335 0.5371  0.001 ***
## Alox_mmolkg     -0.40124  0.43448 -0.80637 0.5726  0.001 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## Permutation: free
## Number of permutations: 999
## 
## ***FACTORS:
## 
## Centroids:
##                            Dim1    Dim2    Dim3
## sample_typeforest        0.0679  0.1867  0.1190
## sample_typemeadow       -0.0229  0.1780 -0.0916
## sample_typeorganic      -0.0098 -0.1293  0.0392
## sample_typeconventional  0.0006 -0.1050  0.0041
## 
## Goodness of fit:
##                 r2 Pr(>r)    
## sample_type 0.1997  0.001 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## Permutation: free
## Number of permutations: 999
## 
## 1 observation deleted due to missingness
ef13.adj <- pcoa.env13
pvals.adj <- p.adjust (pcoa.env13$vectors$pvals, method = 'bonferroni')
ef13.adj$vectors$pvals <- pvals.adj
ef13.adj
## 
## ***VECTORS
## 
##                     Dim1     Dim2     Dim3     r2 Pr(>r)  
## pH_H2O           0.71582 -0.34967  0.60442 0.5947  0.012 *
## C_g_per_kg      -0.70297  0.28453 -0.65182 0.6894  0.012 *
## N_gkg           -0.74553  0.24218 -0.62092 0.6801  0.012 *
## TP_gkg          -0.91468 -0.33863 -0.22064 0.5108  0.012 *
## depth_numerical  0.82942  0.16519  0.53364 0.5935  0.012 *
## DOC_mgkg        -0.57514  0.48141 -0.66141 0.5386  0.012 *
## Pinorg_mgkg      0.12414 -0.67859  0.72395 0.1308  0.024 *
## Porg_mgkg       -0.84201  0.03811 -0.53812 0.6442  0.012 *
## log_root        -0.70696  0.52696 -0.47172 0.5404  0.012 *
## C_per_N         -0.78189  0.17543 -0.59823 0.4935  0.012 *
## Feox_mmolkg     -0.67315  0.42760 -0.60335 0.5371  0.012 *
## Alox_mmolkg     -0.40124  0.43448 -0.80637 0.5726  0.012 *
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## Permutation: free
## Number of permutations: 999
## 
## ***FACTORS:
## 
## Centroids:
##                            Dim1    Dim2    Dim3
## sample_typeforest        0.0679  0.1867  0.1190
## sample_typemeadow       -0.0229  0.1780 -0.0916
## sample_typeorganic      -0.0098 -0.1293  0.0392
## sample_typeconventional  0.0006 -0.1050  0.0041
## 
## Goodness of fit:
##                 r2 Pr(>r)    
## sample_type 0.1997  0.001 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## Permutation: free
## Number of permutations: 999
## 
## 1 observation deleted due to missingness
# first categorial variables
env.scores_cat13 <- as.data.frame(scores(pcoa.env13, display = "factors"))*0.25
env.scores_cat13 <- cbind(env.scores_cat13, env.variables = rownames(env.scores_cat13)) #and then gives them their names

env.scores_cat13 <- cbind(env.scores_cat13, pval = pcoa.env13$factors$pvals) # add pvalues to dataframe
sig.env.scores_cat13 <- subset(env.scores_cat13, pval<=0.05) #subset data to show variables significant at 0.05



# then continous variables
env.scores_cont13 <- as.data.frame(scores(pcoa.env13, display = "vectors"))*0.25
env.scores_cont13 <- cbind(env.scores_cont13, env.variables = rownames(env.scores_cont13)) #and then gives them their names

env.scores_cont13 <- cbind(env.scores_cont13, pval = pcoa.env13$vectors$pvals) # add pvalues to dataframe
sig.env.scores_cont13 <- subset(env.scores_cont13, pval<=0.05) #subset data to show variables significant at 0.05
# The species scores of the 0.1% most abundant and 100% of the best fitting OTUs
ordis13 <- ordiselect(OTU, species.scores13, ablim = 0.001, fitlim = 1, choices = c(1,3), method = "axes", env = pcoa.env13)
## [1] "21 species selected (0.1% of total number of species)."
## [1] "All species selected which belong to the 0.1% most abundant species."
ordis13.species.scores <- species.scores13[ordis13, ]
ordis13.species.scores <- cbind(ordis13.species.scores, Species = rownames(ordis13.species.scores)) #add species names to dataframe
# change into sp names
try <- merge(data.frame(ordis13.species.scores), data.frame(OTU.sp), 
             by = 0,  all = FALSE)
#add row names
samp2 <- try[,-1]
rownames(samp2) <- try[,1]
try <- samp2
rm(samp2)
# remove the "Species"
try <- try[ , -c(2, 4)]
# remove "try"
ordis13.species.scores <- try
rm(try)

head(ordis13.species.scores)
##                          V1                   V3                        species
## OTU12776  0.126946387694976 -0.00534028987033588             Clonostachys_rosea
## OTU139    0.200755588616414  -0.0370071455091096     Entomortierella_parvispora
## OTU13985 -0.237582025036352  -0.0956487846132982            Saitozyma_podzolica
## OTU19296 -0.295916773006835    0.120579023660146      Cladorrhinum_unclassified
## OTU20886  -0.25280082681134    0.026574191820344 Paraphaeosphaeria_unclassified
## OTU23599  -0.22805912031261   -0.175344391490801       Paraphaeosphaeria_viciae
##               OTU
## OTU12776 OTU12776
## OTU139     OTU139
## OTU13985 OTU13985
## OTU19296 OTU19296
## OTU20886 OTU20886
## OTU23599 OTU23599
ordis13.species.scores$V1 <- as.numeric(ordis13.species.scores$V1)
ordis13.species.scores$V3 <- as.numeric(ordis13.species.scores$V3)
#First lets run the PCoA in with phyloseq to get the axis percentages

GP.ord <- ordinate(ps_RA, "PCoA", "bray", k = 3)
p2 = plot_ordination(ps_RA, GP.ord, type="samples", axes = c(1, 3), color="sample_type", shape="depth")
p2

Remember to change the axis percentages accordingly!!

pcoa.plot <- ggplot() + geom_point(data=site.scrs, aes(Dim1, Dim3, colour = factor(site.scrs$soil_type), shape = factor(site.scrs$depth)), size = 6, alpha = 0.6, stroke = 1.5) + theme_cowplot() + theme(panel.background = element_rect(fill = NA, colour = "black", size = 1, linetype = "solid")) + labs(colour = "", shape = "") + theme(legend.position = "right", legend.text = element_text(size = 12), axis.text = element_text(size = 16)) + scale_colour_manual(values = MyPalette) + labs(y = "PC3 (5.8%)", x = "PC1 (20.0%)")

pcoa.plot

# modify the species names
ordis13.species.scores$species <- gsub("_unclassified","",as.character(ordis13.species.scores$species))
pcoa.plot_OTU <- pcoa.plot + ggrepel::geom_text_repel(data = ordis13.species.scores, aes(x=V1,y=V3,label=species), alpha=0.7, cex = 3.5, direction = "both", segment.size = 0.2, max.overlaps = Inf) + theme(legend.text = element_text(size = 12)) + theme(legend.position = c(0.85, 0.8), legend.text = element_text(size = 12))# + theme(legend.position="none")# if problems, this might help
pcoa.plot_OTU

In the final figure I will not have the env. variables, but let’s see how it looks anyhow

pcoa.plot_OTU +
  geom_segment(aes(x = 0, y = 0, xend = Dim1, yend = Dim3), data = sig.env.scores_cont13, size =1, alpha = 0.5, colour = "grey30") + ggrepel::geom_text_repel(data = sig.env.scores_cont13, aes(x = Dim1, y = Dim3), colour = "blue", fontface = "bold", label = sig.env.scores_cont13$env.variables, max.overlaps=Inf, direction = "y", segment.size = 0.2, box.padding = unit(0.5, "lines"), point.padding = (1), force = 1, max.time = 30, nudge_y = 0.01, nudge_x = 0.01) + theme(legend.position = c(0.91, 0.8), legend.text = element_text(size = 12))  #+ theme(legend.position = "none")


RESULTS STEP 3: PERMANOVA analysis at OTU level

Here I do permutational analysis of variance or PERMANOVA. With PERMANOVA, I want to check how much the main treatment factors, management type (here sample_type) and soil layer (depth), are responsible for differences in fungal communities. In addition, I will check how soil layers differ within management type (4.5) and in which soil layers we see a management type effect (4.6)

1. load packages and data

library('phyloseq')
library("cowplot")
library("dplyr")
library("ggplot2")
library("vegan")
library("microbiome")
library("tibble")
library("pairwiseAdonis")


setwd('C:\\Users\\lehakkin\\PhD\\Fungal_ITS_soil_vertical_profile')

load('ps_FINAL')
ps
## phyloseq-class experiment-level object
## otu_table()   OTU Table:         [ 20610 taxa and 140 samples ]
## sample_data() Sample Data:       [ 140 samples by 28 sample variables ]
## tax_table()   Taxonomy Table:    [ 20610 taxa by 7 taxonomic ranks ]
# Pick relative abundances (compositional) and sample metadata 
ps_RA <- microbiome::transform(ps, "compositional")
otu <- abundances(ps_RA)
meta <- meta(ps_RA)

2. Calculate Bray-Curtis (dis)similarities

ps_RA_bray <- phyloseq::distance(ps_RA, method = "bray")

3. check homogenity of variances

Check that variance homogeneity assumptions hold (to ensure the reliability of the results). If groups have signif. different spreads the permanova result may be potentially explained by that, rtaher than the groups.

Betadisper first calculates the average distance of group members to the group centroid in multivariate space (generated by a distance matrix). Then, an ANOVA is done to test if the dispersions (variances) of groups are different.

3.1 for management type

anova(betadisper(ps_RA_bray, meta$sample_type))
## Analysis of Variance Table
## 
## Response: Distances
##            Df  Sum Sq  Mean Sq F value Pr(>F)
## Groups      3 0.05437 0.018125   1.247 0.2953
## Residuals 136 1.97679 0.014535

We see that the ANOVA p-value is not significant meaning that the homogeneity of variance assumption is met

3.2 for depth

anova(betadisper(ps_RA_bray, meta$depth))
## Analysis of Variance Table
## 
## Response: Distances
##            Df  Sum Sq  Mean Sq F value    Pr(>F)    
## Groups      4 0.41023 0.102556   9.276 1.184e-06 ***
## Residuals 135 1.49258 0.011056                      
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

We see that the ANOVA p-value is highly significant meaning that homogeneity of variance assumption is NOT met

3.2.1 post hoc

I’ll do post hoc analysis with Tukey’s test to see which groups differ in relation to their variances

TukeyHSD(betadisper(ps_RA_bray, meta$depth))
##   Tukey multiple comparisons of means
##     95% family-wise confidence level
## 
## Fit: aov(formula = distances ~ group, data = df)
## 
## $group
##                         diff          lwr        upr     p adj
## 10...20-0...10  -0.028389112 -0.106091062 0.04931284 0.8502993
## 20...30-0...10   0.041806442 -0.035895508 0.11950839 0.5722606
## 30...40-0...10   0.051717780 -0.025984170 0.12941973 0.3549246
## 40...-0...10     0.130756030  0.053054080 0.20845798 0.0000749
## 20...30-10...20  0.070195554 -0.007506396 0.14789750 0.0971647
## 30...40-10...20  0.080106892  0.002404942 0.15780884 0.0397902
## 40...-10...20    0.159145142  0.081443192 0.23684709 0.0000009
## 30...40-20...30  0.009911338 -0.067790612 0.08761329 0.9966478
## 40...-20...30    0.088949588  0.011247638 0.16665154 0.0161603
## 40...-30...40    0.079038250  0.001336300 0.15674020 0.0440806

Dispersions differ significantly between 40… and all other, and between 30…40 and 10…20

The latter is not a problem at all, because I am not interested of comparing layers if they are not consecutive, but I will keep in mind, that the consecutive layers 30-40 cmd and 40-80 cm do not have similar dispersions.

4. PERMANOVA

First, I will do PERMANOVA so that I include all management types (later without forest)

4.1 check if depth or management have larger effect

# first with just soil type and strata option
adonis2(formula = ps_RA_bray~ sample_type, data = meta, permutations = 9999, method = "bray", by = "margin", strata = meta$depth)
## Permutation test for adonis under reduced model
## Marginal effects of terms
## Blocks:  strata 
## Permutation: free
## Number of permutations: 9999
## 
## adonis2(formula = ps_RA_bray ~ sample_type, data = meta, permutations = 9999, method = "bray", by = "margin", strata = meta$depth)
##              Df SumOfSqs      R2      F Pr(>F)    
## sample_type   3    5.591 0.10399 5.2615  1e-04 ***
## Residual    136   48.175 0.89601                  
## Total       139   53.766 1.00000                  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# then with just depth and strata option
adonis2(formula = ps_RA_bray~ depth, data = meta, permutations = 9999, method = "bray", by = "margin", strata = meta$sample_type)
## Permutation test for adonis under reduced model
## Marginal effects of terms
## Blocks:  strata 
## Permutation: free
## Number of permutations: 9999
## 
## adonis2(formula = ps_RA_bray ~ depth, data = meta, permutations = 9999, method = "bray", by = "margin", strata = meta$sample_type)
##           Df SumOfSqs     R2      F Pr(>F)    
## depth      4    9.705 0.1805 7.4335  1e-04 ***
## Residual 135   44.061 0.8195                  
## Total    139   53.766 1.0000                  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Depth has a larger effect. So, let’s put it first in the model

4.2 PERMANOVA full model

For the full model it matters which “by” option we choose. When by=“terms” significance for each term is calculated sequentially from first to last, so that the order of terms matter. We will use this, because with sequential analysis we will get R2 values that sum up to 1, and will also get the significance and R2 values for each interaction term separately rather than for the interaction alone.

final <- adonis2(formula = ps_RA_bray ~ depth * sample_type, data = meta, permutations = 9999, method = "bray", by = "terms")
final
## Permutation test for adonis under reduced model
## Terms added sequentially (first to last)
## Permutation: free
## Number of permutations: 9999
## 
## adonis2(formula = ps_RA_bray ~ depth * sample_type, data = meta, permutations = 9999, method = "bray", by = "terms")
##                    Df SumOfSqs      R2      F Pr(>F)    
## depth               4    9.705 0.18050 9.1871  1e-04 ***
## sample_type         3    5.591 0.10399 7.0576  1e-04 ***
## depth:sample_type  12    6.780 0.12611 2.1396  1e-04 ***
## Residual          120   31.690 0.58940                  
## Total             139   53.766 1.00000                  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

4.2.1 export the result

#write.csv2(final, "\C:\\Users\\lehakkin\\PhD\\Fungal_ITS_soil_vertical_profile\\permanova_soiltype_and_depth.csv")

4.2.2 PERMANOVA full model without forest

I will not use this, rather the one above with forest

# subset samples
ps_RA <- microbiome::transform(ps, "compositional")
ps_x <- subset_samples(ps_RA, sample_type != "forest")
meta_subset <- meta(ps_x)


# You have only submitted samples, not OTUs. Define prevalence of each taxa
# (in how many samples did each taxa appear at least once)
prev0 = apply(X = otu_table(ps_x),
              MARGIN = ifelse(taxa_are_rows(ps_x), yes = 1, no = 2),
              FUN = function(x){sum(x > 0)})

# Execute prevalence filter, using `prune_taxa()` function
ps_x = prune_taxa((prev0 > 0), ps_x)
ps_x
## phyloseq-class experiment-level object
## otu_table()   OTU Table:         [ 19820 taxa and 125 samples ]
## sample_data() Sample Data:       [ 125 samples by 28 sample variables ]
## tax_table()   Taxonomy Table:    [ 19820 taxa by 7 taxonomic ranks ]
b <- phyloseq::distance(ps_x, method = "bray")


set.seed(777)

final <- adonis2(formula = b ~ depth*sample_type, data = meta_subset, permutations = 9999, method = "bray", by = "terms")
final
## Permutation test for adonis under reduced model
## Terms added sequentially (first to last)
## Permutation: free
## Number of permutations: 9999
## 
## adonis2(formula = b ~ depth * sample_type, data = meta_subset, permutations = 9999, method = "bray", by = "terms")
##                    Df SumOfSqs      R2      F Pr(>F)    
## depth               4    9.800 0.21235 9.7433  1e-04 ***
## sample_type         2    3.927 0.08509 7.8083  1e-04 ***
## depth:sample_type   8    4.763 0.10320 2.3676  1e-04 ***
## Residual          110   27.660 0.59936                  
## Total             124   46.149 1.00000                  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

4.3 Pairwise PERMANOVA: management type

Pairwise PERMANOVA I will only do so that forest is excluded due to too few replicates for forest. But I will not use these result, this is just to check

set.seed(777)
pair.mod<-pairwise.adonis(b, factors=meta_subset$sample_type)
pair.mod
##                     pairs Df SumsOfSqs  F.Model         R2 p.value p.adjusted
## 1  conventional vs meadow  1  2.351620 6.722317 0.07492358   0.001      0.003
## 2 conventional vs organic  1  1.000537 2.947677 0.03429618   0.010      0.030
## 3       meadow vs organic  1  2.574066 7.371501 0.08634616   0.001      0.003
##   sig
## 1   *
## 2   .
## 3   *

Each treatment differ from the other treatments at the 0.05 confidence level.

4.5 Depth effect in each management type

I will do pairwise permanova analysis of depth for all management types separately, except for forest which has too few replicates

4.5.1 Meadow

# subset samples
x <- "meadow"
ps_RA_subset <- subset_samples(ps_RA, sample_type == x)
meta_subset <- subset(meta, sample_type == x)

# You have only submitted samples, not OTUs. Define prevalence of each taxa
# (in how many samples did each taxa appear at least once)
prev0 = apply(X = otu_table(ps_RA_subset),
              MARGIN = ifelse(taxa_are_rows(ps_RA_subset), yes = 1, no = 2),
              FUN = function(x){sum(x > 0)})

# Execute prevalence filter, using `prune_taxa()` function
ps_RA_subset = prune_taxa((prev0 > 0), ps_RA_subset)
ps_RA_subset
## phyloseq-class experiment-level object
## otu_table()   OTU Table:         [ 11032 taxa and 40 samples ]
## sample_data() Sample Data:       [ 40 samples by 28 sample variables ]
## tax_table()   Taxonomy Table:    [ 11032 taxa by 7 taxonomic ranks ]
ps_RA_subset_bray <- phyloseq::distance(ps_RA_subset, method = "bray")

set.seed(777)
pair.mod<-pairwise.adonis(ps_RA_subset_bray,factors=meta_subset$depth)
pair.mod
##                 pairs Df SumsOfSqs  F.Model        R2 p.value p.adjusted sig
## 1   0...10 vs 10...20  1 0.6686591 4.418415 0.2398912   0.001       0.01   *
## 2   0...10 vs 20...30  1 1.2787641 6.259368 0.3089617   0.001       0.01   *
## 3   0...10 vs 30...40  1 2.1571845 9.795664 0.4116575   0.001       0.01   *
## 4     0...10 vs 40...  1 1.6755335 5.447545 0.2801148   0.002       0.02   .
## 5  10...20 vs 20...30  1 0.3728923 1.802396 0.1140584   0.048       0.48    
## 6  10...20 vs 30...40  1 1.6735250 7.511019 0.3491708   0.001       0.01   *
## 7    10...20 vs 40...  1 1.5401662 4.965605 0.2618216   0.002       0.02   .
## 8  20...30 vs 30...40  1 0.9160107 3.321638 0.1917624   0.001       0.01   *
## 9    20...30 vs 40...  1 1.0302087 2.837037 0.1684998   0.001       0.01   *
## 10   30...40 vs 40...  1 0.7004325 1.847860 0.1166000   0.012       0.12

4.5.2 Organic

# subset samples
x <- "organic"
ps_RA_subset <- subset_samples(ps_RA, sample_type == x)
meta_subset <- subset(meta, sample_type == x)

# You have only submitted samples, not OTUs. Define prevalence of each taxa
# (in how many samples did each taxa appear at least once)
prev0 = apply(X = otu_table(ps_RA_subset),
              MARGIN = ifelse(taxa_are_rows(ps_RA_subset), yes = 1, no = 2),
              FUN = function(x){sum(x > 0)})

# Execute prevalence filter, using `prune_taxa()` function
ps_RA_subset = prune_taxa((prev0 > 0), ps_RA_subset)
ps_RA_subset
## phyloseq-class experiment-level object
## otu_table()   OTU Table:         [ 14151 taxa and 40 samples ]
## sample_data() Sample Data:       [ 40 samples by 28 sample variables ]
## tax_table()   Taxonomy Table:    [ 14151 taxa by 7 taxonomic ranks ]
ps_RA_subset_bray <- phyloseq::distance(ps_RA_subset, method = "bray")

set.seed(777)
pair.mod<-pairwise.adonis(ps_RA_subset_bray,factors=meta_subset$depth)
pair.mod
##                 pairs Df SumsOfSqs   F.Model         R2 p.value p.adjusted sig
## 1   0...10 vs 10...20  1 0.1366228  0.906114 0.06078808   0.443       1.00    
## 2   0...10 vs 20...30  1 0.4849847  2.278740 0.13998260   0.007       0.07    
## 3   0...10 vs 30...40  1 2.0284059  8.323851 0.37286806   0.001       0.01   *
## 4     0...10 vs 40...  1 2.3642953 11.171511 0.44381567   0.002       0.02   .
## 5  10...20 vs 20...30  1 0.4214687  2.119591 0.13149163   0.016       0.16    
## 6  10...20 vs 30...40  1 2.1185424  9.223078 0.39715141   0.001       0.01   *
## 7    10...20 vs 40...  1 2.4397574 12.343810 0.46856585   0.002       0.02   .
## 8  20...30 vs 30...40  1 1.0503864  3.600278 0.20455802   0.001       0.01   *
## 9    20...30 vs 40...  1 1.4017114  5.397392 0.27825350   0.001       0.01   *
## 10   30...40 vs 40...  1 0.5095377  1.753656 0.11131738   0.041       0.41

4.5.2 Conventional

# subset samples
x <- "conventional"
ps_RA_subset <- subset_samples(ps_RA, sample_type == x)
meta_subset <- subset(meta, sample_type == x)

# You have only submitted samples, not OTUs. Define prevalence of each taxa
# (in how many samples did each taxa appear at least once)
prev0 = apply(X = otu_table(ps_RA_subset),
              MARGIN = ifelse(taxa_are_rows(ps_RA_subset), yes = 1, no = 2),
              FUN = function(x){sum(x > 0)})

# Execute prevalence filter, using `prune_taxa()` function
ps_RA_subset = prune_taxa((prev0 > 0), ps_RA_subset)
ps_RA_subset
## phyloseq-class experiment-level object
## otu_table()   OTU Table:         [ 13863 taxa and 45 samples ]
## sample_data() Sample Data:       [ 45 samples by 28 sample variables ]
## tax_table()   Taxonomy Table:    [ 13863 taxa by 7 taxonomic ranks ]
ps_RA_subset_bray <- phyloseq::distance(ps_RA_subset, method = "bray")

set.seed(777)
pair.mod<-pairwise.adonis(ps_RA_subset_bray,factors=meta_subset$depth)
pair.mod
##                 pairs Df SumsOfSqs    F.Model         R2 p.value p.adjusted sig
## 1   0...10 vs 10...20  1 0.1170223  0.8087735 0.04811615   0.757       1.00    
## 2   0...10 vs 20...30  1 0.7762987  3.5720248 0.18250665   0.003       0.03   .
## 3   0...10 vs 30...40  1 2.1200130  8.7764519 0.35422553   0.001       0.01   *
## 4     0...10 vs 40...  1 1.6494331  5.3721690 0.25136283   0.001       0.01   *
## 5  10...20 vs 20...30  1 0.7642509  4.0496264 0.20198014   0.007       0.07    
## 6  10...20 vs 30...40  1 2.2368859 10.5042323 0.39632283   0.001       0.01   *
## 7    10...20 vs 40...  1 1.8451677  6.6271158 0.29288381   0.001       0.01   *
## 8  20...30 vs 30...40  1 0.6735640  2.3585234 0.12847021   0.025       0.25    
## 9    20...30 vs 40...  1 0.7552291  2.1512623 0.11851861   0.013       0.13    
## 10   30...40 vs 40...  1 0.4996687  1.3314104 0.07682066   0.105       1.00

4.6 Management effect at different depths?

I will analyse these without forest as forest has too few replicates

4.6.1 1st layer

# define soil layer to be analysed
x <- "0...10"

# subset samples
ps_RA_subset <- subset_samples(ps_RA, sample_type != "forest")

ps_RA_subset <- subset_samples(ps_RA_subset, depth == x)
meta_subset <- subset(meta, depth == x & sample_type != "forest")

# You have only submitted samples, not OTUs. Define prevalence of each taxa
# (in how many samples did each taxa appear at least once)
prev0 = apply(X = otu_table(ps_RA_subset),
              MARGIN = ifelse(taxa_are_rows(ps_RA_subset), yes = 1, no = 2),
              FUN = function(x){sum(x > 0)})

# Execute prevalence filter, using `prune_taxa()` function
ps_RA_subset = prune_taxa((prev0 > 0), ps_RA_subset)
ps_RA_subset
## phyloseq-class experiment-level object
## otu_table()   OTU Table:         [ 13638 taxa and 25 samples ]
## sample_data() Sample Data:       [ 25 samples by 28 sample variables ]
## tax_table()   Taxonomy Table:    [ 13638 taxa by 7 taxonomic ranks ]
ps_RA_subset_bray <- phyloseq::distance(ps_RA_subset, method = "bray")


set.seed(777)
pair.mod<-pairwise.adonis(ps_RA_subset_bray,factors=meta_subset$sample_type)
pair.mod
##                     pairs Df SumsOfSqs  F.Model        R2 p.value p.adjusted
## 1  conventional vs meadow  1 1.5452073 9.547816 0.3889477   0.001      0.003
## 2 conventional vs organic  1 0.7360756 4.347365 0.2247006   0.001      0.003
## 3       meadow vs organic  1 1.5542597 9.915270 0.4146000   0.002      0.006
##   sig
## 1   *
## 2   *
## 3   *

4.6.2 2nd layer

# subset samples
x <- "10...20"


# subset samples
ps_RA_subset <- subset_samples(ps_RA, sample_type != "forest")

ps_RA_subset <- subset_samples(ps_RA_subset, depth == x)
meta_subset <- subset(meta, depth == x & sample_type != "forest")

# You have only submitted samples, not OTUs. Define prevalence of each taxa
# (in how many samples did each taxa appear at least once)
prev0 = apply(X = otu_table(ps_RA_subset),
              MARGIN = ifelse(taxa_are_rows(ps_RA_subset), yes = 1, no = 2),
              FUN = function(x){sum(x > 0)})

# Execute prevalence filter, using `prune_taxa()` function
ps_RA_subset = prune_taxa((prev0 > 0), ps_RA_subset)
ps_RA_subset
## phyloseq-class experiment-level object
## otu_table()   OTU Table:         [ 15128 taxa and 25 samples ]
## sample_data() Sample Data:       [ 25 samples by 28 sample variables ]
## tax_table()   Taxonomy Table:    [ 15128 taxa by 7 taxonomic ranks ]
ps_RA_subset_bray <- phyloseq::distance(ps_RA_subset, method = "bray")



set.seed(777)
pair.mod<-pairwise.adonis(ps_RA_subset_bray,factors=meta_subset$sample_type)
pair.mod
##                     pairs Df SumsOfSqs  F.Model        R2 p.value p.adjusted
## 1  conventional vs meadow  1 1.3202292 9.871317 0.3968956   0.001      0.003
## 2 conventional vs organic  1 0.6464922 5.141142 0.2552557   0.001      0.003
## 3       meadow vs organic  1 1.3555168 9.325285 0.3997930   0.002      0.006
##   sig
## 1   *
## 2   *
## 3   *

4.6.3 3rd layer

# subset samples
x <- "20...30"

# subset samples
ps_RA_subset <- subset_samples(ps_RA, sample_type != "forest")

ps_RA_subset <- subset_samples(ps_RA_subset, depth == x)
meta_subset <- subset(meta, depth == x & sample_type != "forest")

# You have only submitted samples, not OTUs. Define prevalence of each taxa
# (in how many samples did each taxa appear at least once)
prev0 = apply(X = otu_table(ps_RA_subset),
              MARGIN = ifelse(taxa_are_rows(ps_RA_subset), yes = 1, no = 2),
              FUN = function(x){sum(x > 0)})

# Execute prevalence filter, using `prune_taxa()` function
ps_RA_subset = prune_taxa((prev0 > 0), ps_RA_subset)
ps_RA_subset
## phyloseq-class experiment-level object
## otu_table()   OTU Table:         [ 14304 taxa and 25 samples ]
## sample_data() Sample Data:       [ 25 samples by 28 sample variables ]
## tax_table()   Taxonomy Table:    [ 14304 taxa by 7 taxonomic ranks ]
ps_RA_subset_bray <- phyloseq::distance(ps_RA_subset, method = "bray")



set.seed(777)
pair.mod<-pairwise.adonis(ps_RA_subset_bray,factors=meta_subset$sample_type)
pair.mod
##                     pairs Df SumsOfSqs  F.Model        R2 p.value p.adjusted
## 1  conventional vs meadow  1 0.8814463 3.381681 0.1839702   0.001      0.003
## 2 conventional vs organic  1 0.4094517 1.567927 0.0946363   0.114      0.342
## 3       meadow vs organic  1 0.8348207 3.206261 0.1863427   0.002      0.006
##   sig
## 1   *
## 2    
## 3   *

4.6.4 4th layer

# subset samples
x <- "30...40"

# subset samples
ps_RA_subset <- subset_samples(ps_RA, sample_type != "forest")

ps_RA_subset <- subset_samples(ps_RA_subset, depth == x)
meta_subset <- subset(meta, depth == x & sample_type != "forest")

# You have only submitted samples, not OTUs. Define prevalence of each taxa
# (in how many samples did each taxa appear at least once)
prev0 = apply(X = otu_table(ps_RA_subset),
              MARGIN = ifelse(taxa_are_rows(ps_RA_subset), yes = 1, no = 2),
              FUN = function(x){sum(x > 0)})

# Execute prevalence filter, using `prune_taxa()` function
ps_RA_subset = prune_taxa((prev0 > 0), ps_RA_subset)
ps_RA_subset
## phyloseq-class experiment-level object
## otu_table()   OTU Table:         [ 5256 taxa and 25 samples ]
## sample_data() Sample Data:       [ 25 samples by 28 sample variables ]
## tax_table()   Taxonomy Table:    [ 5256 taxa by 7 taxonomic ranks ]
ps_RA_subset_bray <- phyloseq::distance(ps_RA_subset, method = "bray")



set.seed(777)
pair.mod<-pairwise.adonis(ps_RA_subset_bray,factors=meta_subset$sample_type)
pair.mod
##                     pairs Df SumsOfSqs   F.Model         R2 p.value p.adjusted
## 1  conventional vs meadow  1 0.7196829 2.3881246 0.13734228   0.006      0.018
## 2 conventional vs organic  1 0.2610497 0.8266673 0.05223256   0.707      1.000
## 3       meadow vs organic  1 0.5459411 1.7774404 0.11265708   0.012      0.036
##   sig
## 1   .
## 2    
## 3   .

4.6.5 5th layer

# subset samples
x <- "40..."

# subset samples
ps_RA_subset <- subset_samples(ps_RA, sample_type != "forest")

ps_RA_subset <- subset_samples(ps_RA_subset, depth == x)
meta_subset <- subset(meta, depth == x & sample_type != "forest")

# You have only submitted samples, not OTUs. Define prevalence of each taxa
# (in how many samples did each taxa appear at least once)
prev0 = apply(X = otu_table(ps_RA_subset),
              MARGIN = ifelse(taxa_are_rows(ps_RA_subset), yes = 1, no = 2),
              FUN = function(x){sum(x > 0)})

# Execute prevalence filter, using `prune_taxa()` function
ps_RA_subset = prune_taxa((prev0 > 0), ps_RA_subset)
ps_RA_subset
## phyloseq-class experiment-level object
## otu_table()   OTU Table:         [ 2487 taxa and 25 samples ]
## sample_data() Sample Data:       [ 25 samples by 28 sample variables ]
## tax_table()   Taxonomy Table:    [ 2487 taxa by 7 taxonomic ranks ]
ps_RA_subset_bray <- phyloseq::distance(ps_RA_subset, method = "bray")


set.seed(777)
pair.mod<-pairwise.adonis(ps_RA_subset_bray,factors=meta_subset$sample_type)
pair.mod
##                     pairs Df SumsOfSqs  F.Model         R2 p.value p.adjusted
## 1  conventional vs meadow  1 0.4663823 1.030147 0.06426309   0.353      1.000
## 2 conventional vs organic  1 0.7337456 2.062743 0.12089163   0.011      0.033
## 3       meadow vs organic  1 1.0846039 2.992358 0.17610025   0.002      0.006
##   sig
## 1    
## 2   .
## 3   *

RESULTS STEP 4: PERMANOVA analysis with soil properties

PERMANOVA with soil properties will be done with only meadow, organic and conventional treatments excluding forest

1. load packages and data

library('phyloseq')
library("cowplot")
library("dplyr")
library("ggplot2")
library("vegan")
library("microbiome")
library("tibble")
library("pairwiseAdonis")

setwd('C:\\Users\\lehakkin\\PhD\\Fungal_ITS_soil_vertical_profile')

load('ps_FINAL')
ps
## phyloseq-class experiment-level object
## otu_table()   OTU Table:         [ 20610 taxa and 140 samples ]
## sample_data() Sample Data:       [ 140 samples by 28 sample variables ]
## tax_table()   Taxonomy Table:    [ 20610 taxa by 7 taxonomic ranks ]
# Pick relative abundances (compositional) and sample metadata 
ps_RA <- microbiome::transform(ps, "compositional")
ps_RA_nf <- subset_samples(ps_RA, sample_type!="forest")

# You have only submitted samples, not OTUs. Define prevalence of each taxa
# (in how many samples did each taxa appear at least once)
prev0 = apply(X = otu_table(ps_RA_nf),
              MARGIN = ifelse(taxa_are_rows(ps_RA_nf), yes = 1, no = 2),
              FUN = function(x){sum(x > 0)})

# Execute prevalence filter, using `prune_taxa()` function
ps_RA_nf = prune_taxa((prev0 > 0), ps_RA_nf)
ps_RA_nf
## phyloseq-class experiment-level object
## otu_table()   OTU Table:         [ 19820 taxa and 125 samples ]
## sample_data() Sample Data:       [ 125 samples by 28 sample variables ]
## tax_table()   Taxonomy Table:    [ 19820 taxa by 7 taxonomic ranks ]
otu <- abundances(ps_RA_nf)
meta <- meta(ps_RA_nf)

Note: adonis cannot handle or account for NA or blanks in your data. Use na.omit(meta) %>% before each run where such variables are used where NAs exist

2. Calculate Bray-Curtis (dis)similarities

ps_RA_bray <- phyloseq::distance(ps_RA_nf, method = "bray")

4.2 PERMANOVA with soil properties

I will use the following soil properties

“log_root”
“pH_H2O”
“C_g_per_kg”
“N_gkg”
“TP_gkg”
“Alox_mmolkg”
“Feox_mmolkg”
“PH2O_mgkg”
“Porg_mgkg”
“DOC_mgkg”
“Pinorg_mgkg”
“C_per_N”

final <- adonis2(formula = ps_RA_bray ~ log_root, data = meta, permutations = 9999, method = "bray")
final
## Permutation test for adonis under reduced model
## Permutation: free
## Number of permutations: 9999
## 
## adonis2(formula = ps_RA_bray ~ log_root, data = meta, permutations = 9999, method = "bray")
##           Df SumOfSqs      R2      F Pr(>F)    
## Model      1    5.369 0.11634 16.194  1e-04 ***
## Residual 123   40.780 0.88366                  
## Total    124   46.149 1.00000                  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
final <- adonis2(formula = ps_RA_bray ~ pH_H2O, data = meta, permutations = 9999, method = "bray")
final
## Permutation test for adonis under reduced model
## Permutation: free
## Number of permutations: 9999
## 
## adonis2(formula = ps_RA_bray ~ pH_H2O, data = meta, permutations = 9999, method = "bray")
##           Df SumOfSqs     R2      F Pr(>F)    
## Model      1    6.041 0.1309 18.526  1e-04 ***
## Residual 123   40.108 0.8691                  
## Total    124   46.149 1.0000                  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
final <- adonis2(formula = ps_RA_bray ~ C_g_per_kg, data = meta, permutations = 9999, method = "bray")
final
## Permutation test for adonis under reduced model
## Permutation: free
## Number of permutations: 9999
## 
## adonis2(formula = ps_RA_bray ~ C_g_per_kg, data = meta, permutations = 9999, method = "bray")
##           Df SumOfSqs      R2    F Pr(>F)    
## Model      1    6.482 0.14046 20.1  1e-04 ***
## Residual 123   39.667 0.85954                
## Total    124   46.149 1.00000                
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
final <- adonis2(formula = ps_RA_bray ~ N_gkg, data = meta, permutations = 9999, method = "bray")
final
## Permutation test for adonis under reduced model
## Permutation: free
## Number of permutations: 9999
## 
## adonis2(formula = ps_RA_bray ~ N_gkg, data = meta, permutations = 9999, method = "bray")
##           Df SumOfSqs      R2      F Pr(>F)    
## Model      1    6.647 0.14404 20.698  1e-04 ***
## Residual 123   39.502 0.85596                  
## Total    124   46.149 1.00000                  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
final <- adonis2(formula = ps_RA_bray ~ TP_gkg, data = meta, permutations = 9999, method = "bray")
final
## Permutation test for adonis under reduced model
## Permutation: free
## Number of permutations: 9999
## 
## adonis2(formula = ps_RA_bray ~ TP_gkg, data = meta, permutations = 9999, method = "bray")
##           Df SumOfSqs      R2      F Pr(>F)    
## Model      1    5.559 0.12046 16.846  1e-04 ***
## Residual 123   40.590 0.87954                  
## Total    124   46.149 1.00000                  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
final <- adonis2(formula = ps_RA_bray ~ Alox_mmolkg, data = meta, permutations = 9999, method = "bray")
final
## Permutation test for adonis under reduced model
## Permutation: free
## Number of permutations: 9999
## 
## adonis2(formula = ps_RA_bray ~ Alox_mmolkg, data = meta, permutations = 9999, method = "bray")
##           Df SumOfSqs      R2      F Pr(>F)    
## Model      1    3.713 0.08045 10.762  1e-04 ***
## Residual 123   42.437 0.91955                  
## Total    124   46.149 1.00000                  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
final <- adonis2(formula = ps_RA_bray ~ Feox_mmolkg, data = meta, permutations = 9999, method = "bray")
final
## Permutation test for adonis under reduced model
## Permutation: free
## Number of permutations: 9999
## 
## adonis2(formula = ps_RA_bray ~ Feox_mmolkg, data = meta, permutations = 9999, method = "bray")
##           Df SumOfSqs      R2      F Pr(>F)    
## Model      1    4.853 0.10517 14.456  1e-04 ***
## Residual 123   41.296 0.89483                  
## Total    124   46.149 1.00000                  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
final <- adonis2(formula = ps_RA_bray ~ DOC_mgkg, data = meta, permutations = 9999, method = "bray")
final
## Permutation test for adonis under reduced model
## Permutation: free
## Number of permutations: 9999
## 
## adonis2(formula = ps_RA_bray ~ DOC_mgkg, data = meta, permutations = 9999, method = "bray")
##           Df SumOfSqs      R2      F Pr(>F)    
## Model      1    5.300 0.11484 15.957  1e-04 ***
## Residual 123   40.850 0.88516                  
## Total    124   46.149 1.00000                  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
final <- adonis2(formula = ps_RA_bray ~ Pinorg_mgkg, data = meta, permutations = 9999, method = "bray")
final
## Permutation test for adonis under reduced model
## Permutation: free
## Number of permutations: 9999
## 
## adonis2(formula = ps_RA_bray ~ Pinorg_mgkg, data = meta, permutations = 9999, method = "bray")
##           Df SumOfSqs      R2      F Pr(>F)  
## Model      1    0.699 0.01515 1.8918 0.0305 *
## Residual 123   45.450 0.98485                
## Total    124   46.149 1.00000                
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
final <- adonis2(formula = ps_RA_bray ~ C_per_N, data = meta, permutations = 9999, method = "bray")
final
## Permutation test for adonis under reduced model
## Permutation: free
## Number of permutations: 9999
## 
## adonis2(formula = ps_RA_bray ~ C_per_N, data = meta, permutations = 9999, method = "bray")
##           Df SumOfSqs      R2      F Pr(>F)    
## Model      1    4.909 0.10637 14.641  1e-04 ***
## Residual 123   41.240 0.89363                  
## Total    124   46.149 1.00000                  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

check if missing values

colSums(is.na(meta))
##          sampleID              plot sampling_position             depth 
##                 0                 0                 0                 0 
##   depth_numerical        vegetation       sample_type          root_mgg 
##                 0                 0                 0                 0 
##            pH_H2O           EC_uScm        C_g_per_kg             N_gkg 
##                 0                 0                 0                 0 
##            TP_gkg       Alox_mmolkg       Feox_mmolkg     oxides_mmolkg 
##                 0                 0                 0                 0 
##         PH2O_mgkg         Porg_mgkg          DOC_mgkg       Pinorg_mgkg 
##                 1                 1                 0                 0 
##           C_per_N          observed             chao1           shannon 
##                 0                 0                 0                 0 
##      observed_sng         chao1_sng       shannon_sng          log_root 
##                 0                 0                 0                 0

These are NA:

Porg_mgkg for sample NG2A2_30to40

PH2O_mgkg for sample NG2B3_0to10

m <- meta %>% drop_na(Porg_mgkg)

x <- subset_samples(ps_RA_nf, sampleID!="NG2A2_30to40")

# You have only submitted samples, not OTUs. Define prevalence of each taxa
# (in how many samples did each taxa appear at least once)
prev0 = apply(X = otu_table(x),
              MARGIN = ifelse(taxa_are_rows(x), yes = 1, no = 2),
              FUN = function(x){sum(x > 0)})

# Execute prevalence filter, using `prune_taxa()` function
x = prune_taxa((prev0 > 0), x)
x
## phyloseq-class experiment-level object
## otu_table()   OTU Table:         [ 19817 taxa and 124 samples ]
## sample_data() Sample Data:       [ 124 samples by 28 sample variables ]
## tax_table()   Taxonomy Table:    [ 19817 taxa by 7 taxonomic ranks ]
otu <- abundances(x)

b <- phyloseq::distance(x, method = "bray")

final <- adonis2(formula = b ~ Porg_mgkg, data = m, permutations = 9999, method = "bray", by = "terms")
final
## Permutation test for adonis under reduced model
## Terms added sequentially (first to last)
## Permutation: free
## Number of permutations: 9999
## 
## adonis2(formula = b ~ Porg_mgkg, data = m, permutations = 9999, method = "bray", by = "terms")
##            Df SumOfSqs      R2      F Pr(>F)    
## Porg_mgkg   1    6.645 0.14528 20.736  1e-04 ***
## Residual  122   39.095 0.85472                  
## Total     123   45.739 1.00000                  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
m <- meta %>% drop_na(PH2O_mgkg)

x <- subset_samples(ps_RA_nf, sampleID!="NG2B3_0to10")

# You have only submitted samples, not OTUs. Define prevalence of each taxa
# (in how many samples did each taxa appear at least once)
prev0 = apply(X = otu_table(x),
              MARGIN = ifelse(taxa_are_rows(x), yes = 1, no = 2),
              FUN = function(x){sum(x > 0)})

# Execute prevalence filter, using `prune_taxa()` function
x = prune_taxa((prev0 > 0), x)
x
## phyloseq-class experiment-level object
## otu_table()   OTU Table:         [ 19819 taxa and 124 samples ]
## sample_data() Sample Data:       [ 124 samples by 28 sample variables ]
## tax_table()   Taxonomy Table:    [ 19819 taxa by 7 taxonomic ranks ]
otu <- abundances(x)

b <- phyloseq::distance(x, method = "bray")

final <- adonis2(formula = b ~ PH2O_mgkg, data = m, permutations = 9999, method = "bray", by = "terms")
final
## Permutation test for adonis under reduced model
## Terms added sequentially (first to last)
## Permutation: free
## Number of permutations: 9999
## 
## adonis2(formula = b ~ PH2O_mgkg, data = m, permutations = 9999, method = "bray", by = "terms")
##            Df SumOfSqs      R2      F Pr(>F)    
## PH2O_mgkg   1    1.935 0.04229 5.3866  1e-04 ***
## Residual  122   43.824 0.95771                  
## Total     123   45.759 1.00000                  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

PERMANOVA with all soil properties in one model

remove these samples

Porg_mgkg (NG2A2_30to40) PH2O_mgkg (NG2B3_0to10)

m <- meta %>% drop_na(PH2O_mgkg)
m <- m %>% drop_na(Porg_mgkg)

ps_x <- subset_samples(ps_RA_nf, sampleID!="NG2B3_0to10")
ps_x <- subset_samples(ps_x, sampleID!="NG2A2_30to40")

# You have only submitted samples, not OTUs. Define prevalence of each taxa
# (in how many samples did each taxa appear at least once)
prev0 = apply(X = otu_table(ps_x),
              MARGIN = ifelse(taxa_are_rows(ps_x), yes = 1, no = 2),
              FUN = function(x){sum(x > 0)})

# Execute prevalence filter, using `prune_taxa()` function
ps_x = prune_taxa((prev0 > 0), ps_x)
ps_x
## phyloseq-class experiment-level object
## otu_table()   OTU Table:         [ 19816 taxa and 123 samples ]
## sample_data() Sample Data:       [ 123 samples by 28 sample variables ]
## tax_table()   Taxonomy Table:    [ 19816 taxa by 7 taxonomic ranks ]
b <- phyloseq::distance(ps_x, method = "bray")

final <- adonis2(formula = b ~ log_root + pH_H2O + C_g_per_kg + N_gkg + TP_gkg + Alox_mmolkg + Feox_mmolkg + PH2O_mgkg + Porg_mgkg + DOC_mgkg + Pinorg_mgkg + C_per_N, data = m, permutations = 9999, method = "bray", by = NULL)

final
## Permutation test for adonis under reduced model
## Permutation: free
## Number of permutations: 9999
## 
## adonis2(formula = b ~ log_root + pH_H2O + C_g_per_kg + N_gkg + TP_gkg + Alox_mmolkg + Feox_mmolkg + PH2O_mgkg + Porg_mgkg + DOC_mgkg + Pinorg_mgkg + C_per_N, data = m, permutations = 9999, method = "bray", by = NULL)
##           Df SumOfSqs      R2      F Pr(>F)    
## Model     12   15.333 0.33811 4.6826  1e-04 ***
## Residual 110   30.016 0.66189                  
## Total    122   45.350 1.00000                  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

PERMANOVA for each soil layer separatley

run for all layers and all of these separately:

# Define the environmental variables as a character vector, not as a factor
env <- c("log_root", "pH_H2O", "C_g_per_kg", "N_gkg", "TP_gkg", "Alox_mmolkg", "Feox_mmolkg", 
         "PH2O_mgkg", "Porg_mgkg", "DOC_mgkg", "Pinorg_mgkg", "C_per_N")

# Convert the 'depth' column to a factor
meta$depth <- as.factor(meta$depth)

# Initialize an empty list to store the results
adonis_results <- list()

# Loop over each depth and environmental variable
for (i in levels(meta$depth)) {
  for (j in env) {
    # Subset samples
    ps_x <- subset_samples(ps_RA, sample_type != "forest")
    ps_x <- subset_samples(ps_x, sampleID != "NG2B3_0to10")
    ps_x <- subset_samples(ps_x, sampleID != "NG2A2_30to40")
    ps_x <- subset_samples(ps_x, depth == i)
    meta_subset <- meta(ps_x)

    # Define prevalence of each taxa (in how many samples did each taxa appear at least once)
    prev0 <- apply(X = otu_table(ps_x),
                   MARGIN = ifelse(taxa_are_rows(ps_x), yes = 1, no = 2),
                   FUN = function(x) { sum(x > 0) })

    # Execute prevalence filter, using `prune_taxa()` function
    ps_x <- prune_taxa((prev0 > 0), ps_x)

    # Calculate Bray-Curtis distance
    b <- phyloseq::distance(ps_x, method = "bray")

    # Run adonis2 analysis
    formula <- as.formula(paste("b ~", j))
    adonis_result <- adonis2(formula = formula, data = meta_subset, permutations = 9999, method = "bray", by = NULL)
    
    # Store the result in the list with a descriptive name
    result_name <- paste("depth", i, "env", j, sep = "_")
    adonis_results[[result_name]] <- adonis_result
  }
}

# View the list of results
adonis_results
## $depth_0...10_env_log_root
## Permutation test for adonis under reduced model
## Permutation: free
## Number of permutations: 9999
## 
## adonis2(formula = formula, data = meta_subset, permutations = 9999, method = "bray", by = NULL)
##          Df SumOfSqs      R2      F Pr(>F)   
## Model     1   0.7966 0.13653 3.4787 0.0014 **
## Residual 22   5.0376 0.86347                 
## Total    23   5.8342 1.00000                 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## $depth_0...10_env_pH_H2O
## Permutation test for adonis under reduced model
## Permutation: free
## Number of permutations: 9999
## 
## adonis2(formula = formula, data = meta_subset, permutations = 9999, method = "bray", by = NULL)
##          Df SumOfSqs     R2      F Pr(>F)   
## Model     1   0.6628 0.1136 2.8195 0.0054 **
## Residual 22   5.1714 0.8864                 
## Total    23   5.8342 1.0000                 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## $depth_0...10_env_C_g_per_kg
## Permutation test for adonis under reduced model
## Permutation: free
## Number of permutations: 9999
## 
## adonis2(formula = formula, data = meta_subset, permutations = 9999, method = "bray", by = NULL)
##          Df SumOfSqs      R2      F Pr(>F)    
## Model     1   1.5213 0.26075 7.7599  1e-04 ***
## Residual 22   4.3129 0.73925                  
## Total    23   5.8342 1.00000                  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## $depth_0...10_env_N_gkg
## Permutation test for adonis under reduced model
## Permutation: free
## Number of permutations: 9999
## 
## adonis2(formula = formula, data = meta_subset, permutations = 9999, method = "bray", by = NULL)
##          Df SumOfSqs      R2      F Pr(>F)    
## Model     1   1.4712 0.25216 7.4181  1e-04 ***
## Residual 22   4.3630 0.74784                  
## Total    23   5.8342 1.00000                  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## $depth_0...10_env_TP_gkg
## Permutation test for adonis under reduced model
## Permutation: free
## Number of permutations: 9999
## 
## adonis2(formula = formula, data = meta_subset, permutations = 9999, method = "bray", by = NULL)
##          Df SumOfSqs      R2      F Pr(>F)
## Model     1   0.2863 0.04908 1.1354 0.2882
## Residual 22   5.5479 0.95092              
## Total    23   5.8342 1.00000              
## 
## $depth_0...10_env_Alox_mmolkg
## Permutation test for adonis under reduced model
## Permutation: free
## Number of permutations: 9999
## 
## adonis2(formula = formula, data = meta_subset, permutations = 9999, method = "bray", by = NULL)
##          Df SumOfSqs      R2      F Pr(>F)    
## Model     1   1.0063 0.17248 4.5855  1e-04 ***
## Residual 22   4.8279 0.82752                  
## Total    23   5.8342 1.00000                  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## $depth_0...10_env_Feox_mmolkg
## Permutation test for adonis under reduced model
## Permutation: free
## Number of permutations: 9999
## 
## adonis2(formula = formula, data = meta_subset, permutations = 9999, method = "bray", by = NULL)
##          Df SumOfSqs      R2      F Pr(>F)   
## Model     1   0.7390 0.12666 3.1908 0.0025 **
## Residual 22   5.0952 0.87334                 
## Total    23   5.8342 1.00000                 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## $depth_0...10_env_PH2O_mgkg
## Permutation test for adonis under reduced model
## Permutation: free
## Number of permutations: 9999
## 
## adonis2(formula = formula, data = meta_subset, permutations = 9999, method = "bray", by = NULL)
##          Df SumOfSqs      R2      F Pr(>F)  
## Model     1   0.5390 0.09239 2.2396 0.0204 *
## Residual 22   5.2951 0.90761                
## Total    23   5.8342 1.00000                
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## $depth_0...10_env_Porg_mgkg
## Permutation test for adonis under reduced model
## Permutation: free
## Number of permutations: 9999
## 
## adonis2(formula = formula, data = meta_subset, permutations = 9999, method = "bray", by = NULL)
##          Df SumOfSqs      R2      F Pr(>F)    
## Model     1   0.7905 0.13549 3.4479  9e-04 ***
## Residual 22   5.0437 0.86451                  
## Total    23   5.8342 1.00000                  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## $depth_0...10_env_DOC_mgkg
## Permutation test for adonis under reduced model
## Permutation: free
## Number of permutations: 9999
## 
## adonis2(formula = formula, data = meta_subset, permutations = 9999, method = "bray", by = NULL)
##          Df SumOfSqs      R2      F Pr(>F)    
## Model     1   1.4018 0.24027 6.9575  1e-04 ***
## Residual 22   4.4324 0.75973                  
## Total    23   5.8342 1.00000                  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## $depth_0...10_env_Pinorg_mgkg
## Permutation test for adonis under reduced model
## Permutation: free
## Number of permutations: 9999
## 
## adonis2(formula = formula, data = meta_subset, permutations = 9999, method = "bray", by = NULL)
##          Df SumOfSqs      R2      F Pr(>F)
## Model     1   0.3372 0.05779 1.3495 0.1689
## Residual 22   5.4970 0.94221              
## Total    23   5.8342 1.00000              
## 
## $depth_0...10_env_C_per_N
## Permutation test for adonis under reduced model
## Permutation: free
## Number of permutations: 9999
## 
## adonis2(formula = formula, data = meta_subset, permutations = 9999, method = "bray", by = NULL)
##          Df SumOfSqs      R2      F Pr(>F)    
## Model     1   1.1766 0.20168 5.5579  1e-04 ***
## Residual 22   4.6575 0.79832                  
## Total    23   5.8342 1.00000                  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## $depth_10...20_env_log_root
## Permutation test for adonis under reduced model
## Permutation: free
## Number of permutations: 9999
## 
## adonis2(formula = formula, data = meta_subset, permutations = 9999, method = "bray", by = NULL)
##          Df SumOfSqs      R2      F Pr(>F)   
## Model     1   0.7280 0.14084 3.7704 0.0013 **
## Residual 23   4.4407 0.85916                 
## Total    24   5.1686 1.00000                 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## $depth_10...20_env_pH_H2O
## Permutation test for adonis under reduced model
## Permutation: free
## Number of permutations: 9999
## 
## adonis2(formula = formula, data = meta_subset, permutations = 9999, method = "bray", by = NULL)
##          Df SumOfSqs      R2      F Pr(>F)    
## Model     1   0.9411 0.18209 5.1204  2e-04 ***
## Residual 23   4.2275 0.81791                  
## Total    24   5.1686 1.00000                  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## $depth_10...20_env_C_g_per_kg
## Permutation test for adonis under reduced model
## Permutation: free
## Number of permutations: 9999
## 
## adonis2(formula = formula, data = meta_subset, permutations = 9999, method = "bray", by = NULL)
##          Df SumOfSqs      R2      F Pr(>F)    
## Model     1   1.2399 0.23989 7.2586  1e-04 ***
## Residual 23   3.9287 0.76011                  
## Total    24   5.1686 1.00000                  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## $depth_10...20_env_N_gkg
## Permutation test for adonis under reduced model
## Permutation: free
## Number of permutations: 9999
## 
## adonis2(formula = formula, data = meta_subset, permutations = 9999, method = "bray", by = NULL)
##          Df SumOfSqs      R2      F Pr(>F)    
## Model     1   0.9946 0.19242 5.4802  1e-04 ***
## Residual 23   4.1741 0.80758                  
## Total    24   5.1686 1.00000                  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## $depth_10...20_env_TP_gkg
## Permutation test for adonis under reduced model
## Permutation: free
## Number of permutations: 9999
## 
## adonis2(formula = formula, data = meta_subset, permutations = 9999, method = "bray", by = NULL)
##          Df SumOfSqs      R2      F Pr(>F)
## Model     1   0.2530 0.04895 1.1838  0.243
## Residual 23   4.9156 0.95105              
## Total    24   5.1686 1.00000              
## 
## $depth_10...20_env_Alox_mmolkg
## Permutation test for adonis under reduced model
## Permutation: free
## Number of permutations: 9999
## 
## adonis2(formula = formula, data = meta_subset, permutations = 9999, method = "bray", by = NULL)
##          Df SumOfSqs      R2      F Pr(>F)    
## Model     1   1.2957 0.25069 7.6951  1e-04 ***
## Residual 23   3.8729 0.74931                  
## Total    24   5.1686 1.00000                  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## $depth_10...20_env_Feox_mmolkg
## Permutation test for adonis under reduced model
## Permutation: free
## Number of permutations: 9999
## 
## adonis2(formula = formula, data = meta_subset, permutations = 9999, method = "bray", by = NULL)
##          Df SumOfSqs      R2      F Pr(>F)    
## Model     1   1.1620 0.22483 6.6708  1e-04 ***
## Residual 23   4.0066 0.77517                  
## Total    24   5.1686 1.00000                  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## $depth_10...20_env_PH2O_mgkg
## Permutation test for adonis under reduced model
## Permutation: free
## Number of permutations: 9999
## 
## adonis2(formula = formula, data = meta_subset, permutations = 9999, method = "bray", by = NULL)
##          Df SumOfSqs      R2      F Pr(>F)  
## Model     1   0.3596 0.06957 1.7196 0.0688 .
## Residual 23   4.8091 0.93043                
## Total    24   5.1686 1.00000                
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## $depth_10...20_env_Porg_mgkg
## Permutation test for adonis under reduced model
## Permutation: free
## Number of permutations: 9999
## 
## adonis2(formula = formula, data = meta_subset, permutations = 9999, method = "bray", by = NULL)
##          Df SumOfSqs      R2      F Pr(>F)
## Model     1   0.2818 0.05453 1.3265 0.1728
## Residual 23   4.8868 0.94547              
## Total    24   5.1686 1.00000              
## 
## $depth_10...20_env_DOC_mgkg
## Permutation test for adonis under reduced model
## Permutation: free
## Number of permutations: 9999
## 
## adonis2(formula = formula, data = meta_subset, permutations = 9999, method = "bray", by = NULL)
##          Df SumOfSqs      R2      F Pr(>F)    
## Model     1   1.1131 0.21535 6.3126  1e-04 ***
## Residual 23   4.0555 0.78465                  
## Total    24   5.1686 1.00000                  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## $depth_10...20_env_Pinorg_mgkg
## Permutation test for adonis under reduced model
## Permutation: free
## Number of permutations: 9999
## 
## adonis2(formula = formula, data = meta_subset, permutations = 9999, method = "bray", by = NULL)
##          Df SumOfSqs      R2      F Pr(>F)  
## Model     1   0.3936 0.07615 1.8959   0.05 *
## Residual 23   4.7750 0.92385                
## Total    24   5.1686 1.00000                
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## $depth_10...20_env_C_per_N
## Permutation test for adonis under reduced model
## Permutation: free
## Number of permutations: 9999
## 
## adonis2(formula = formula, data = meta_subset, permutations = 9999, method = "bray", by = NULL)
##          Df SumOfSqs      R2      F Pr(>F)   
## Model     1   0.5862 0.11342 2.9425  0.005 **
## Residual 23   4.5824 0.88658                 
## Total    24   5.1686 1.00000                 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## $depth_20...30_env_log_root
## Permutation test for adonis under reduced model
## Permutation: free
## Number of permutations: 9999
## 
## adonis2(formula = formula, data = meta_subset, permutations = 9999, method = "bray", by = NULL)
##          Df SumOfSqs      R2      F Pr(>F)   
## Model     1   0.7395 0.10346 2.6541 0.0029 **
## Residual 23   6.4086 0.89654                 
## Total    24   7.1482 1.00000                 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## $depth_20...30_env_pH_H2O
## Permutation test for adonis under reduced model
## Permutation: free
## Number of permutations: 9999
## 
## adonis2(formula = formula, data = meta_subset, permutations = 9999, method = "bray", by = NULL)
##          Df SumOfSqs      R2      F Pr(>F)    
## Model     1   0.8450 0.11821 3.0833  2e-04 ***
## Residual 23   6.3032 0.88179                  
## Total    24   7.1482 1.00000                  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## $depth_20...30_env_C_g_per_kg
## Permutation test for adonis under reduced model
## Permutation: free
## Number of permutations: 9999
## 
## adonis2(formula = formula, data = meta_subset, permutations = 9999, method = "bray", by = NULL)
##          Df SumOfSqs      R2      F Pr(>F)   
## Model     1   0.7142 0.09991 2.5531 0.0049 **
## Residual 23   6.4340 0.90009                 
## Total    24   7.1482 1.00000                 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## $depth_20...30_env_N_gkg
## Permutation test for adonis under reduced model
## Permutation: free
## Number of permutations: 9999
## 
## adonis2(formula = formula, data = meta_subset, permutations = 9999, method = "bray", by = NULL)
##          Df SumOfSqs      R2      F Pr(>F)   
## Model     1   0.6962 0.09739 2.4816 0.0067 **
## Residual 23   6.4520 0.90261                 
## Total    24   7.1482 1.00000                 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## $depth_20...30_env_TP_gkg
## Permutation test for adonis under reduced model
## Permutation: free
## Number of permutations: 9999
## 
## adonis2(formula = formula, data = meta_subset, permutations = 9999, method = "bray", by = NULL)
##          Df SumOfSqs      R2      F Pr(>F)  
## Model     1   0.4651 0.06506 1.6005 0.0711 .
## Residual 23   6.6831 0.93494                
## Total    24   7.1482 1.00000                
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## $depth_20...30_env_Alox_mmolkg
## Permutation test for adonis under reduced model
## Permutation: free
## Number of permutations: 9999
## 
## adonis2(formula = formula, data = meta_subset, permutations = 9999, method = "bray", by = NULL)
##          Df SumOfSqs      R2      F Pr(>F)   
## Model     1   0.6839 0.09567 2.4332 0.0059 **
## Residual 23   6.4643 0.90433                 
## Total    24   7.1482 1.00000                 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## $depth_20...30_env_Feox_mmolkg
## Permutation test for adonis under reduced model
## Permutation: free
## Number of permutations: 9999
## 
## adonis2(formula = formula, data = meta_subset, permutations = 9999, method = "bray", by = NULL)
##          Df SumOfSqs      R2      F Pr(>F)   
## Model     1   0.7618 0.10658 2.7437 0.0021 **
## Residual 23   6.3863 0.89342                 
## Total    24   7.1482 1.00000                 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## $depth_20...30_env_PH2O_mgkg
## Permutation test for adonis under reduced model
## Permutation: free
## Number of permutations: 9999
## 
## adonis2(formula = formula, data = meta_subset, permutations = 9999, method = "bray", by = NULL)
##          Df SumOfSqs      R2      F Pr(>F)  
## Model     1   0.4951 0.06927 1.7117 0.0493 *
## Residual 23   6.6530 0.93073                
## Total    24   7.1482 1.00000                
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## $depth_20...30_env_Porg_mgkg
## Permutation test for adonis under reduced model
## Permutation: free
## Number of permutations: 9999
## 
## adonis2(formula = formula, data = meta_subset, permutations = 9999, method = "bray", by = NULL)
##          Df SumOfSqs      R2      F Pr(>F)  
## Model     1   0.6443 0.09014 2.2786 0.0105 *
## Residual 23   6.5038 0.90986                
## Total    24   7.1482 1.00000                
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## $depth_20...30_env_DOC_mgkg
## Permutation test for adonis under reduced model
## Permutation: free
## Number of permutations: 9999
## 
## adonis2(formula = formula, data = meta_subset, permutations = 9999, method = "bray", by = NULL)
##          Df SumOfSqs      R2      F Pr(>F)   
## Model     1   0.6601 0.09235 2.3401 0.0085 **
## Residual 23   6.4881 0.90765                 
## Total    24   7.1482 1.00000                 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## $depth_20...30_env_Pinorg_mgkg
## Permutation test for adonis under reduced model
## Permutation: free
## Number of permutations: 9999
## 
## adonis2(formula = formula, data = meta_subset, permutations = 9999, method = "bray", by = NULL)
##          Df SumOfSqs      R2      F Pr(>F)
## Model     1   0.3030 0.04239 1.0182 0.4027
## Residual 23   6.8451 0.95761              
## Total    24   7.1482 1.00000              
## 
## $depth_20...30_env_C_per_N
## Permutation test for adonis under reduced model
## Permutation: free
## Number of permutations: 9999
## 
## adonis2(formula = formula, data = meta_subset, permutations = 9999, method = "bray", by = NULL)
##          Df SumOfSqs      R2      F Pr(>F)  
## Model     1   0.4538 0.06348 1.5591 0.0878 .
## Residual 23   6.6944 0.93652                
## Total    24   7.1482 1.00000                
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## $depth_30...40_env_log_root
## Permutation test for adonis under reduced model
## Permutation: free
## Number of permutations: 9999
## 
## adonis2(formula = formula, data = meta_subset, permutations = 9999, method = "bray", by = NULL)
##          Df SumOfSqs      R2      F Pr(>F)  
## Model     1   0.5268 0.07033 1.6643 0.0328 *
## Residual 22   6.9641 0.92967                
## Total    23   7.4909 1.00000                
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## $depth_30...40_env_pH_H2O
## Permutation test for adonis under reduced model
## Permutation: free
## Number of permutations: 9999
## 
## adonis2(formula = formula, data = meta_subset, permutations = 9999, method = "bray", by = NULL)
##          Df SumOfSqs      R2     F Pr(>F)  
## Model     1   0.4728 0.06311 1.482 0.0711 .
## Residual 22   7.0181 0.93689               
## Total    23   7.4909 1.00000               
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## $depth_30...40_env_C_g_per_kg
## Permutation test for adonis under reduced model
## Permutation: free
## Number of permutations: 9999
## 
## adonis2(formula = formula, data = meta_subset, permutations = 9999, method = "bray", by = NULL)
##          Df SumOfSqs      R2    F Pr(>F)
## Model     1   0.2850 0.03804 0.87 0.5167
## Residual 22   7.2060 0.96196            
## Total    23   7.4909 1.00000            
## 
## $depth_30...40_env_N_gkg
## Permutation test for adonis under reduced model
## Permutation: free
## Number of permutations: 9999
## 
## adonis2(formula = formula, data = meta_subset, permutations = 9999, method = "bray", by = NULL)
##          Df SumOfSqs      R2      F Pr(>F)
## Model     1   0.2756 0.03678 0.8402 0.6539
## Residual 22   7.2154 0.96322              
## Total    23   7.4909 1.00000              
## 
## $depth_30...40_env_TP_gkg
## Permutation test for adonis under reduced model
## Permutation: free
## Number of permutations: 9999
## 
## adonis2(formula = formula, data = meta_subset, permutations = 9999, method = "bray", by = NULL)
##          Df SumOfSqs      R2      F Pr(>F)
## Model     1   0.4086 0.05455 1.2693 0.1997
## Residual 22   7.0823 0.94545              
## Total    23   7.4909 1.00000              
## 
## $depth_30...40_env_Alox_mmolkg
## Permutation test for adonis under reduced model
## Permutation: free
## Number of permutations: 9999
## 
## adonis2(formula = formula, data = meta_subset, permutations = 9999, method = "bray", by = NULL)
##          Df SumOfSqs      R2      F Pr(>F)  
## Model     1   0.5886 0.07858 1.8761 0.0193 *
## Residual 22   6.9023 0.92142                
## Total    23   7.4909 1.00000                
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## $depth_30...40_env_Feox_mmolkg
## Permutation test for adonis under reduced model
## Permutation: free
## Number of permutations: 9999
## 
## adonis2(formula = formula, data = meta_subset, permutations = 9999, method = "bray", by = NULL)
##          Df SumOfSqs      R2      F Pr(>F)  
## Model     1   0.5944 0.07936 1.8963  0.018 *
## Residual 22   6.8965 0.92064                
## Total    23   7.4909 1.00000                
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## $depth_30...40_env_PH2O_mgkg
## Permutation test for adonis under reduced model
## Permutation: free
## Number of permutations: 9999
## 
## adonis2(formula = formula, data = meta_subset, permutations = 9999, method = "bray", by = NULL)
##          Df SumOfSqs      R2     F Pr(>F)
## Model     1   0.3054 0.04077 0.935 0.5159
## Residual 22   7.1855 0.95923             
## Total    23   7.4909 1.00000             
## 
## $depth_30...40_env_Porg_mgkg
## Permutation test for adonis under reduced model
## Permutation: free
## Number of permutations: 9999
## 
## adonis2(formula = formula, data = meta_subset, permutations = 9999, method = "bray", by = NULL)
##          Df SumOfSqs      R2     F Pr(>F)  
## Model     1   0.5900 0.07877 1.881 0.0144 *
## Residual 22   6.9009 0.92123               
## Total    23   7.4909 1.00000               
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## $depth_30...40_env_DOC_mgkg
## Permutation test for adonis under reduced model
## Permutation: free
## Number of permutations: 9999
## 
## adonis2(formula = formula, data = meta_subset, permutations = 9999, method = "bray", by = NULL)
##          Df SumOfSqs      R2      F Pr(>F)
## Model     1   0.3720 0.04966 1.1495 0.3077
## Residual 22   7.1189 0.95034              
## Total    23   7.4909 1.00000              
## 
## $depth_30...40_env_Pinorg_mgkg
## Permutation test for adonis under reduced model
## Permutation: free
## Number of permutations: 9999
## 
## adonis2(formula = formula, data = meta_subset, permutations = 9999, method = "bray", by = NULL)
##          Df SumOfSqs      R2      F Pr(>F)
## Model     1   0.2953 0.03942 0.9028 0.5857
## Residual 22   7.1956 0.96058              
## Total    23   7.4909 1.00000              
## 
## $depth_30...40_env_C_per_N
## Permutation test for adonis under reduced model
## Permutation: free
## Number of permutations: 9999
## 
## adonis2(formula = formula, data = meta_subset, permutations = 9999, method = "bray", by = NULL)
##          Df SumOfSqs      R2      F Pr(>F)
## Model     1   0.4801 0.06409 1.5066 0.1147
## Residual 22   7.0108 0.93591              
## Total    23   7.4909 1.00000              
## 
## $depth_40..._env_log_root
## Permutation test for adonis under reduced model
## Permutation: free
## Number of permutations: 9999
## 
## adonis2(formula = formula, data = meta_subset, permutations = 9999, method = "bray", by = NULL)
##          Df SumOfSqs      R2      F Pr(>F)
## Model     1   0.4089 0.04044 0.9693 0.4857
## Residual 23   9.7019 0.95956              
## Total    24  10.1108 1.00000              
## 
## $depth_40..._env_pH_H2O
## Permutation test for adonis under reduced model
## Permutation: free
## Number of permutations: 9999
## 
## adonis2(formula = formula, data = meta_subset, permutations = 9999, method = "bray", by = NULL)
##          Df SumOfSqs      R2      F Pr(>F)
## Model     1   0.5118 0.05062 1.2263 0.1537
## Residual 23   9.5990 0.94938              
## Total    24  10.1108 1.00000              
## 
## $depth_40..._env_C_g_per_kg
## Permutation test for adonis under reduced model
## Permutation: free
## Number of permutations: 9999
## 
## adonis2(formula = formula, data = meta_subset, permutations = 9999, method = "bray", by = NULL)
##          Df SumOfSqs      R2      F Pr(>F)
## Model     1   0.4801 0.04748 1.1465 0.2979
## Residual 23   9.6307 0.95252              
## Total    24  10.1108 1.00000              
## 
## $depth_40..._env_N_gkg
## Permutation test for adonis under reduced model
## Permutation: free
## Number of permutations: 9999
## 
## adonis2(formula = formula, data = meta_subset, permutations = 9999, method = "bray", by = NULL)
##          Df SumOfSqs      R2      F Pr(>F)
## Model     1   0.3803 0.03761 0.8989 0.6571
## Residual 23   9.7305 0.96239              
## Total    24  10.1108 1.00000              
## 
## $depth_40..._env_TP_gkg
## Permutation test for adonis under reduced model
## Permutation: free
## Number of permutations: 9999
## 
## adonis2(formula = formula, data = meta_subset, permutations = 9999, method = "bray", by = NULL)
##          Df SumOfSqs      R2      F Pr(>F)
## Model     1   0.3102 0.03068 0.7281 0.9339
## Residual 23   9.8005 0.96932              
## Total    24  10.1108 1.00000              
## 
## $depth_40..._env_Alox_mmolkg
## Permutation test for adonis under reduced model
## Permutation: free
## Number of permutations: 9999
## 
## adonis2(formula = formula, data = meta_subset, permutations = 9999, method = "bray", by = NULL)
##          Df SumOfSqs      R2      F Pr(>F)
## Model     1   0.3621 0.03582 0.8544 0.7201
## Residual 23   9.7487 0.96418              
## Total    24  10.1108 1.00000              
## 
## $depth_40..._env_Feox_mmolkg
## Permutation test for adonis under reduced model
## Permutation: free
## Number of permutations: 9999
## 
## adonis2(formula = formula, data = meta_subset, permutations = 9999, method = "bray", by = NULL)
##          Df SumOfSqs      R2      F Pr(>F)   
## Model     1   0.8108 0.08019 2.0052 0.0017 **
## Residual 23   9.3000 0.91981                 
## Total    24  10.1108 1.00000                 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## $depth_40..._env_PH2O_mgkg
## Permutation test for adonis under reduced model
## Permutation: free
## Number of permutations: 9999
## 
## adonis2(formula = formula, data = meta_subset, permutations = 9999, method = "bray", by = NULL)
##          Df SumOfSqs      R2      F Pr(>F)
## Model     1   0.4612 0.04561 1.0992 0.2755
## Residual 23   9.6496 0.95439              
## Total    24  10.1108 1.00000              
## 
## $depth_40..._env_Porg_mgkg
## Permutation test for adonis under reduced model
## Permutation: free
## Number of permutations: 9999
## 
## adonis2(formula = formula, data = meta_subset, permutations = 9999, method = "bray", by = NULL)
##          Df SumOfSqs      R2      F Pr(>F)
## Model     1   0.3897 0.03854 0.9221 0.5867
## Residual 23   9.7211 0.96146              
## Total    24  10.1108 1.00000              
## 
## $depth_40..._env_DOC_mgkg
## Permutation test for adonis under reduced model
## Permutation: free
## Number of permutations: 9999
## 
## adonis2(formula = formula, data = meta_subset, permutations = 9999, method = "bray", by = NULL)
##          Df SumOfSqs      R2      F Pr(>F)
## Model     1   0.3038 0.03005 0.7125 0.8952
## Residual 23   9.8070 0.96995              
## Total    24  10.1108 1.00000              
## 
## $depth_40..._env_Pinorg_mgkg
## Permutation test for adonis under reduced model
## Permutation: free
## Number of permutations: 9999
## 
## adonis2(formula = formula, data = meta_subset, permutations = 9999, method = "bray", by = NULL)
##          Df SumOfSqs      R2      F Pr(>F)
## Model     1   0.2886 0.02854 0.6757 0.9697
## Residual 23   9.8222 0.97146              
## Total    24  10.1108 1.00000              
## 
## $depth_40..._env_C_per_N
## Permutation test for adonis under reduced model
## Permutation: free
## Number of permutations: 9999
## 
## adonis2(formula = formula, data = meta_subset, permutations = 9999, method = "bray", by = NULL)
##          Df SumOfSqs      R2      F Pr(>F)
## Model     1   0.5202 0.05145 1.2476 0.1383
## Residual 23   9.5906 0.94855              
## Total    24  10.1108 1.00000

RESULTS STEPS 5 and 6:HEATMAPS with genera and COMPOSITION PLOTS for phyla, class and FUNGuild

A) HEATMAP for meadow, organic and conventional

1. load packages and data

library('phyloseq')
library("cowplot")
library("dplyr")
library("ggplot2")
library("vegan")
library("microbiome")
library("tibble")


setwd('C:\\Users\\lehakkin\\PhD\\Fungal_ITS_soil_vertical_profile')

load('ps_FINAL')
ps
## phyloseq-class experiment-level object
## otu_table()   OTU Table:         [ 20610 taxa and 140 samples ]
## sample_data() Sample Data:       [ 140 samples by 28 sample variables ]
## tax_table()   Taxonomy Table:    [ 20610 taxa by 7 taxonomic ranks ]
# Pick relative abundances (compositional) and sample metadata 
ps_RA <- microbiome::transform(ps, "compositional")
ps_RA_nf <- subset_samples(ps_RA, sample_type!="forest")

# You have only submitted samples, not OTUs. Define prevalence of each taxa
# (in how many samples did each taxa appear at least once)
prev0 = apply(X = otu_table(ps_RA_nf),
              MARGIN = ifelse(taxa_are_rows(ps_RA_nf), yes = 1, no = 2),
              FUN = function(x){sum(x > 0)})

# Execute prevalence filter, using `prune_taxa()` function
ps_RA_nf = prune_taxa((prev0 > 0), ps_RA_nf)
ps_RA_nf
## phyloseq-class experiment-level object
## otu_table()   OTU Table:         [ 19820 taxa and 125 samples ]
## sample_data() Sample Data:       [ 125 samples by 28 sample variables ]
## tax_table()   Taxonomy Table:    [ 19820 taxa by 7 taxonomic ranks ]
meta_nf <- meta(ps_RA_nf)

2. Overview of the heatmap analysis

I will do heatmap for fungal genera using only meadow, organic and conventional soils (without forest). I will use RA values For the final heatmap figure the RA values will be standardizes (z-transformed)

  1. First pick the 20 most abundant genera in each soil type depth
  2. Do multiple testing between sample_type_depths that make sense (do not test between e.g. meadow deep and organic topsoil)
  3. Make a HEATMAP of the significant taxa only

!! NOTE!! I will also include taxa that are not classified at genus level in the heatmap

3. at genus level

ps_genus <- aggregate_rare(ps, level = 'genus', detection = 0/100, prevalence = 0/140)
ps_genus
## phyloseq-class experiment-level object
## otu_table()   OTU Table:         [ 943 taxa and 140 samples ]
## sample_data() Sample Data:       [ 140 samples by 28 sample variables ]
## tax_table()   Taxonomy Table:    [ 943 taxa by 1 taxonomic ranks ]
ps_genus_RA <- microbiome::transform(ps_genus, "compositional")

ps_genus_RA_pruned <- subset_samples(ps_genus_RA, sample_type!="forest")
ps_genus_RA_pruned
## phyloseq-class experiment-level object
## otu_table()   OTU Table:         [ 943 taxa and 125 samples ]
## sample_data() Sample Data:       [ 125 samples by 28 sample variables ]
## tax_table()   Taxonomy Table:    [ 943 taxa by 1 taxonomic ranks ]

4. Pick 20 most abund taxa in each soil management layer

# sample wise filtering according to most abund. genera

# Initialize an empty list to store the taxa
abund.taxa <- list()

for (i in meta_nf$sample_type) {
  for (j in meta_nf$depth) {
    x <- sample_names(sample_data(ps_genus_RA_pruned)[sample_data(ps_genus_RA_pruned)$sample_type == i & sample_data(ps_genus_RA_pruned)$depth == j,])

# Calculate taxa mean of the selected samples
top20 <- head(sort(rowMeans(otu_table(ps_genus_RA_pruned)[,x]), decreasing = TRUE), 20)

result_name <- paste("sample_type", i, "depth", j, sep = "_")

abund.taxa[[result_name]] <- top20
  }
}

5. Combine picked taxa

management_layer <- c(names(abund.taxa))

all_top20 <- c()

for (i in management_layer) {
  top20 <- c(names(abund.taxa[[i]]))
  all_top20 <- c(all_top20, top20)

}

all_top20_unique <- unique(all_top20)
length(all_top20_unique)
## [1] 96
all_top20_unique_df <- as.data.frame(all_top20_unique)

6. Make a phyloseq of picked taxa

# first aggregate to genus
ps_RA_nf_genus <- aggregate_rare(ps_RA_nf, level = "genus", detection = 0/140, prevalence = 0/100)

# filter by taxa name
ps_RA_nf_genus_pruned <- prune_taxa(all_top20_unique, ps_RA_nf_genus)
ps_RA_nf_genus_pruned
## phyloseq-class experiment-level object
## otu_table()   OTU Table:         [ 96 taxa and 125 samples ]
## sample_data() Sample Data:       [ 125 samples by 28 sample variables ]
## tax_table()   Taxonomy Table:    [ 96 taxa by 1 taxonomic ranks ]
ps_genus_nf_HETAMAP <- ps_RA_nf_genus_pruned

7. Add sample_type_depth to meta

meta_nf$sample_type_depth <- NA

meta_nf$sample_type_depth[meta_nf$depth_numerical==5 & meta_nf$sample_type=="meadow"]<-"0...10_meadow"
meta_nf$sample_type_depth[meta_nf$depth_numerical==5 & meta_nf$sample_type=="organic"]<-"0...10_organic"
meta_nf$sample_type_depth[meta_nf$depth_numerical==5 & meta_nf$sample_type=="conventional"]<-"0...10_conventional"

meta_nf$sample_type_depth[meta_nf$depth_numerical==15 & meta_nf$sample_type=="meadow"]<-"10...20_meadow"
meta_nf$sample_type_depth[meta_nf$depth_numerical==15 & meta_nf$sample_type=="organic"]<-"10...20_organic"
meta_nf$sample_type_depth[meta_nf$depth_numerical==15 & meta_nf$sample_type=="conventional"]<-"10...20_conventional"

meta_nf$sample_type_depth[meta_nf$depth_numerical==25 & meta_nf$sample_type=="meadow"]<-"20...30_meadow"
meta_nf$sample_type_depth[meta_nf$depth_numerical==25 & meta_nf$sample_type=="organic"]<-"20...30_organic"
meta_nf$sample_type_depth[meta_nf$depth_numerical==25 & meta_nf$sample_type=="conventional"]<-"20...30_conventional"

meta_nf$sample_type_depth[meta_nf$depth_numerical==35 & meta_nf$sample_type=="meadow"]<-"30...40_meadow"
meta_nf$sample_type_depth[meta_nf$depth_numerical==35 & meta_nf$sample_type=="organic"]<-"30...40_organic"
meta_nf$sample_type_depth[meta_nf$depth_numerical==35 & meta_nf$sample_type=="conventional"]<-"30...40_conventional"

meta_nf$sample_type_depth[meta_nf$depth_numerical==60 & meta_nf$sample_type=="meadow"]<-"40...80_meadow"
meta_nf$sample_type_depth[meta_nf$depth_numerical==60 & meta_nf$sample_type=="organic"]<-"40...80_organic"
meta_nf$sample_type_depth[meta_nf$depth_numerical==60 & meta_nf$sample_type=="conventional"]<-"40...80_conventional"


sample_type_depth <- unique(meta_nf$sample_type_depth)
sample_type_depth
##  [1] "0...10_conventional"  "10...20_conventional" "20...30_conventional"
##  [4] "30...40_conventional" "40...80_conventional" "0...10_meadow"       
##  [7] "10...20_meadow"       "20...30_meadow"       "30...40_meadow"      
## [10] "40...80_meadow"       "0...10_organic"       "10...20_organic"     
## [13] "20...30_organic"      "30...40_organic"      "40...80_organic"
# add the new meta_nf to ps_RA_nf_genus_pruned
sample_data(ps_RA_nf_genus_pruned) <- sample_data(meta_nf)

8. Test abundance differences

lets test separately for the ones that make sense, for example, no point testing between organic 0…10 and conventional 10…20, but rather 0…10 for both soil types and then within organic between 0…10 and 10…20

NOTE! At the end of chunk I do p-value adjustment (“BH”) for all comparisons in the chunk

8.1. 0…10 all soil types

library("data.table")
library("rstatix")

subset <- subset_samples(ps_RA_nf_genus_pruned, sample_type_depth=="0...10_meadow" | sample_type_depth=="0...10_conventional" | sample_type_depth=="0...10_organic")
subset
## phyloseq-class experiment-level object
## otu_table()   OTU Table:         [ 96 taxa and 25 samples ]
## sample_data() Sample Data:       [ 25 samples by 29 sample variables ]
## tax_table()   Taxonomy Table:    [ 96 taxa by 1 taxonomic ranks ]
#create data table
melt_df <-  psmelt(subset)
# Make sample_type_depth into a factor
melt_df$sample_type_depth <- factor(melt_df$sample_type_depth)

pval.list <- list()

for (i in all_top20_unique)
{
  # data.frame with the selected taxonomic group
  df <- filter(melt_df, genus==i)
  x <- pairwise.wilcox.test(df$Abundance, df$sample_type_depth,
                            p.adjust.method = NULL)
  x <- as.data.frame(x[["p.value"]])
  x$genus <- i
  x$comparison <- rownames(x)
  pval.list[[i]] <- x
}

pval.list_df1 <- rbindlist(pval.list)
pval.list_df1 <- as.data.frame(pval.list_df1)
# make into long format: where the new column  called "vs" contains the sample sample_type_depth comparison, and new column "p_value" contains the p_values
pval.list_df1 <- gather(pval.list_df1, vs, p_value, 1:2, factor_key=TRUE)
# adjust p values
pval.list_df1 <- adjust_pvalue(pval.list_df1, p.col = "p_value", output.col = "adj_p", method = "BH")

8.2. 10…20 all soil types

subset <- subset_samples(ps_RA_nf_genus_pruned, sample_type_depth=="10...20_meadow" | sample_type_depth=="10...20_conventional" | sample_type_depth=="10...20_organic")
subset
## phyloseq-class experiment-level object
## otu_table()   OTU Table:         [ 96 taxa and 25 samples ]
## sample_data() Sample Data:       [ 25 samples by 29 sample variables ]
## tax_table()   Taxonomy Table:    [ 96 taxa by 1 taxonomic ranks ]
#create data table
melt_df <-  psmelt(subset)

pval.list <- list()

for (i in all_top20_unique)
{
  # data.frame with the selected taxonomic group
  df <- filter(melt_df, genus==i)
  x <- pairwise.wilcox.test(df$Abundance, df$sample_type_depth,
                            p.adjust.method = "BH")
  x <- as.data.frame(x[["p.value"]])
  x$genus <- i
  x$comparison <- rownames(x)
  pval.list[[i]] <- x
}

pval.list_df2 <- rbindlist(pval.list)
pval.list_df2 <- as.data.frame(pval.list_df2)
# make into long format: where the new column  called "vs" contains the sample sample_type_depth comparison, and new column "p_value" contains the p_values
pval.list_df2 <- gather(pval.list_df2, vs, p_value, 1:2, factor_key=TRUE)
# adjust p values
pval.list_df2 <- adjust_pvalue(pval.list_df2, p.col = "p_value", output.col = "adj_p", method = "BH")

8.3. 20…30 all soil types

subset <- subset_samples(ps_RA_nf_genus_pruned, sample_type_depth=="20...30_meadow" | sample_type_depth=="20...30_conventional" | sample_type_depth=="20...30_organic")
subset
## phyloseq-class experiment-level object
## otu_table()   OTU Table:         [ 96 taxa and 25 samples ]
## sample_data() Sample Data:       [ 25 samples by 29 sample variables ]
## tax_table()   Taxonomy Table:    [ 96 taxa by 1 taxonomic ranks ]
#create data table
melt_df <-  psmelt(subset)

pval.list <- list()

for (i in all_top20_unique)
{
  # data.frame with the selected taxonomic group
  df <- filter(melt_df, genus==i)
  x <- pairwise.wilcox.test(df$Abundance, df$sample_type_depth,
                            p.adjust.method = "BH")
  x <- as.data.frame(x[["p.value"]])
  x$genus <- i
  x$comparison <- rownames(x)
  pval.list[[i]] <- x
}

pval.list_df3 <- rbindlist(pval.list)
pval.list_df3 <- as.data.frame(pval.list_df3)
# make into long format: where the new column  called "vs" contains the sample sample_type_depth comparison, and new column "p_value" contains the p_values
pval.list_df3 <- gather(pval.list_df3, vs, p_value, 1:2, factor_key=TRUE)
# adjust p values
pval.list_df3 <- adjust_pvalue(pval.list_df3, p.col = "p_value", output.col = "adj_p", method = "BH")

8.4. 30…40 all soil types

subset <- subset_samples(ps_RA_nf_genus_pruned, sample_type_depth=="30...40_meadow" | sample_type_depth=="30...40_conventional" | sample_type_depth=="30...40_organic")
subset
## phyloseq-class experiment-level object
## otu_table()   OTU Table:         [ 96 taxa and 25 samples ]
## sample_data() Sample Data:       [ 25 samples by 29 sample variables ]
## tax_table()   Taxonomy Table:    [ 96 taxa by 1 taxonomic ranks ]
#create data table
melt_df <-  psmelt(subset)

pval.list <- list()

for (i in all_top20_unique)
{
  # data.frame with the selected taxonomic group
  df <- filter(melt_df, genus==i)
  x <- pairwise.wilcox.test(df$Abundance, df$sample_type_depth,
                            p.adjust.method = "BH")
  x <- as.data.frame(x[["p.value"]])
  x$genus <- i
  x$comparison <- rownames(x)
  pval.list[[i]] <- x
}

pval.list_df4 <- rbindlist(pval.list)
pval.list_df4 <- as.data.frame(pval.list_df4)
# make into long format: where the new column  called "vs" contains the sample sample_type_depth comparison, and new column "p_value" contains the p_values
pval.list_df4 <- gather(pval.list_df4, vs, p_value, 1:2, factor_key=TRUE)
# adjust p values
pval.list_df4 <- adjust_pvalue(pval.list_df4, p.col = "p_value", output.col = "adj_p", method = "BH")

8.5. 40…80 all soil types

subset <- subset_samples(ps_RA_nf_genus_pruned, sample_type_depth=="40...80_meadow" | sample_type_depth=="40...80_conventional" | sample_type_depth=="40...80_organic")
subset
## phyloseq-class experiment-level object
## otu_table()   OTU Table:         [ 96 taxa and 25 samples ]
## sample_data() Sample Data:       [ 25 samples by 29 sample variables ]
## tax_table()   Taxonomy Table:    [ 96 taxa by 1 taxonomic ranks ]
#create data table
melt_df <-  psmelt(subset)

pval.list <- list()

for (i in all_top20_unique)
{
  # data.frame with the selected taxonomic group
  df <- filter(melt_df, genus==i)
  x <- pairwise.wilcox.test(df$Abundance, df$sample_type_depth,
                            p.adjust.method = "BH")
  x <- as.data.frame(x[["p.value"]])
  x$genus <- i
  x$comparison <- rownames(x)
  pval.list[[i]] <- x
}

pval.list_df5 <- rbindlist(pval.list)
pval.list_df5 <- as.data.frame(pval.list_df5)
# make into long format: where the new column  called "vs" contains the sample sample_type_depth comparison, and new column "p_value" contains the p_values
pval.list_df5 <- gather(pval.list_df5, vs, p_value, 1:2, factor_key=TRUE)
# adjust p values
pval.list_df5 <- adjust_pvalue(pval.list_df5, p.col = "p_value", output.col = "adj_p", method = "BH")

8.6. only organic 0-20

subset <- subset_samples(ps_RA_nf_genus_pruned, sample_type_depth=="0...10_organic" | sample_type_depth=="10...20_organic")
subset
## phyloseq-class experiment-level object
## otu_table()   OTU Table:         [ 96 taxa and 16 samples ]
## sample_data() Sample Data:       [ 16 samples by 29 sample variables ]
## tax_table()   Taxonomy Table:    [ 96 taxa by 1 taxonomic ranks ]
#create data table
melt_df <-  psmelt(subset)

pval.list <- list()

for (i in all_top20_unique)
{
  # data.frame with the selected taxonomic group
  df <- filter(melt_df, genus==i)
  x <- pairwise.wilcox.test(df$Abundance, df$sample_type_depth,
                            p.adjust.method = "BH")
  x <- as.data.frame(x[["p.value"]])
  x$genus <- i
  x$comparison <- rownames(x)
  pval.list[[i]] <- x
}

pval.list_df6 <- rbindlist(pval.list)
pval.list_df6 <- as.data.frame(pval.list_df6)
# make into long format: where the new column  called "vs" contains the sample sample_type_depth comparison, and new column "p_value" contains the p_values
pval.list_df6 <- gather(pval.list_df6, vs, p_value, 1, factor_key=TRUE)
# adjust p values
pval.list_df6 <- adjust_pvalue(pval.list_df6, p.col = "p_value", output.col = "adj_p", method = "BH")

8.7. only organic 10-30

subset <- subset_samples(ps_RA_nf_genus_pruned, sample_type_depth=="10...20_organic" | sample_type_depth=="20...30_organic")
subset
## phyloseq-class experiment-level object
## otu_table()   OTU Table:         [ 96 taxa and 16 samples ]
## sample_data() Sample Data:       [ 16 samples by 29 sample variables ]
## tax_table()   Taxonomy Table:    [ 96 taxa by 1 taxonomic ranks ]
#create data table
melt_df <-  psmelt(subset)

pval.list <- list()

for (i in all_top20_unique)
{
  # data.frame with the selected taxonomic group
  df <- filter(melt_df, genus==i)
  x <- pairwise.wilcox.test(df$Abundance, df$sample_type_depth,
                            p.adjust.method = "BH")
  x <- as.data.frame(x[["p.value"]])
  x$genus <- i
  x$comparison <- rownames(x)
  pval.list[[i]] <- x
}

pval.list_df7 <- rbindlist(pval.list)
pval.list_df7 <- as.data.frame(pval.list_df7)
# make into long format: where the new column  called "vs" contains the sample sample_type_depth comparison, and new column "p_value" contains the p_values
pval.list_df7 <- gather(pval.list_df7, vs, p_value, 1, factor_key=TRUE)
# adjust p values
pval.list_df7 <- adjust_pvalue(pval.list_df7, p.col = "p_value", output.col = "adj_p", method = "BH")

8.8. only organic 20-40

subset <- subset_samples(ps_RA_nf_genus_pruned, sample_type_depth=="20...30_organic" | sample_type_depth=="30...40_organic")
subset
## phyloseq-class experiment-level object
## otu_table()   OTU Table:         [ 96 taxa and 16 samples ]
## sample_data() Sample Data:       [ 16 samples by 29 sample variables ]
## tax_table()   Taxonomy Table:    [ 96 taxa by 1 taxonomic ranks ]
#create data table
melt_df <-  psmelt(subset)

pval.list <- list()

for (i in all_top20_unique)
{
  # data.frame with the selected taxonomic group
  df <- filter(melt_df, genus==i)
  x <- pairwise.wilcox.test(df$Abundance, df$sample_type_depth,
                            p.adjust.method = "BH")
  x <- as.data.frame(x[["p.value"]])
  x$genus <- i
  x$comparison <- rownames(x)
  pval.list[[i]] <- x
}

pval.list_df8 <- rbindlist(pval.list)
pval.list_df8 <- as.data.frame(pval.list_df8)
# make into long format: where the new column  called "vs" contains the sample sample_type_depth comparison, and new column "p_value" contains the p_values
pval.list_df8 <- gather(pval.list_df8, vs, p_value, 1, factor_key=TRUE)
# adjust p values
pval.list_df8 <- adjust_pvalue(pval.list_df8, p.col = "p_value", output.col = "adj_p", method = "BH")

8.9. only organic 30-80

subset <- subset_samples(ps_RA_nf_genus_pruned, sample_type_depth=="30...40_organic" | sample_type_depth=="40...80_organic")
subset
## phyloseq-class experiment-level object
## otu_table()   OTU Table:         [ 96 taxa and 16 samples ]
## sample_data() Sample Data:       [ 16 samples by 29 sample variables ]
## tax_table()   Taxonomy Table:    [ 96 taxa by 1 taxonomic ranks ]
#create data table
melt_df <-  psmelt(subset)

pval.list <- list()

for (i in all_top20_unique)
{
  # data.frame with the selected taxonomic group
  df <- filter(melt_df, genus==i)
  x <- pairwise.wilcox.test(df$Abundance, df$sample_type_depth,
                            p.adjust.method = "BH")
  x <- as.data.frame(x[["p.value"]])
  x$genus <- i
  x$comparison <- rownames(x)
  pval.list[[i]] <- x
}

pval.list_df9 <- rbindlist(pval.list)
pval.list_df9 <- as.data.frame(pval.list_df9)
# make into long format: where the new column  called "vs" contains the sample sample_type_depth comparison, and new column "p_value" contains the p_values
pval.list_df9 <- gather(pval.list_df9, vs, p_value, 1, factor_key=TRUE)
# adjust p values
pval.list_df9 <- adjust_pvalue(pval.list_df9, p.col = "p_value", output.col = "adj_p", method = "BH")

8.10. only conventional 0-20

subset <- subset_samples(ps_RA_nf_genus_pruned, sample_type_depth=="0...10_conventional" | sample_type_depth=="10...20_conventional")
subset
## phyloseq-class experiment-level object
## otu_table()   OTU Table:         [ 96 taxa and 18 samples ]
## sample_data() Sample Data:       [ 18 samples by 29 sample variables ]
## tax_table()   Taxonomy Table:    [ 96 taxa by 1 taxonomic ranks ]
#create data table
melt_df <-  psmelt(subset)

pval.list <- list()

for (i in all_top20_unique)
{
  # data.frame with the selected taxonomic group
  df <- filter(melt_df, genus==i)
  x <- pairwise.wilcox.test(df$Abundance, df$sample_type_depth,
                            p.adjust.method = "BH")
  x <- as.data.frame(x[["p.value"]])
  x$genus <- i
  x$comparison <- rownames(x)
  pval.list[[i]] <- x
}

pval.list_df10 <- rbindlist(pval.list)
pval.list_df10 <- as.data.frame(pval.list_df10)
# make into long format: where the new column  called "vs" contains the sample sample_type_depth comparison, and new column "p_value" contains the p_values
pval.list_df10 <- gather(pval.list_df10, vs, p_value, 1, factor_key=TRUE)
# adjust p values
pval.list_df10 <- adjust_pvalue(pval.list_df10, p.col = "p_value", output.col = "adj_p", method = "BH")

8.11. only conventional 10-30

subset <- subset_samples(ps_RA_nf_genus_pruned, sample_type_depth=="10...20_conventional" | sample_type_depth=="20...30_conventional")
subset
## phyloseq-class experiment-level object
## otu_table()   OTU Table:         [ 96 taxa and 18 samples ]
## sample_data() Sample Data:       [ 18 samples by 29 sample variables ]
## tax_table()   Taxonomy Table:    [ 96 taxa by 1 taxonomic ranks ]
#create data table
melt_df <-  psmelt(subset)

pval.list <- list()

for (i in all_top20_unique)
{
  # data.frame with the selected taxonomic group
  df <- filter(melt_df, genus==i)
  x <- pairwise.wilcox.test(df$Abundance, df$sample_type_depth,
                            p.adjust.method = "BH")
  x <- as.data.frame(x[["p.value"]])
  x$genus <- i
  x$comparison <- rownames(x)
  pval.list[[i]] <- x
}

pval.list_df11 <- rbindlist(pval.list)
pval.list_df11 <- as.data.frame(pval.list_df11)
# make into long format: where the new column  called "vs" contains the sample sample_type_depth comparison, and new column "p_value" contains the p_values
pval.list_df11 <- gather(pval.list_df11, vs, p_value, 1, factor_key=TRUE)
# adjust p values
pval.list_df11 <- adjust_pvalue(pval.list_df11, p.col = "p_value", output.col = "adj_p", method = "BH")

8.12. only conventional 20-40

subset <- subset_samples(ps_RA_nf_genus_pruned, sample_type_depth=="20...30_conventional" | sample_type_depth=="30...40_conventional")
subset
## phyloseq-class experiment-level object
## otu_table()   OTU Table:         [ 96 taxa and 18 samples ]
## sample_data() Sample Data:       [ 18 samples by 29 sample variables ]
## tax_table()   Taxonomy Table:    [ 96 taxa by 1 taxonomic ranks ]
#create data table
melt_df <-  psmelt(subset)

pval.list <- list()

for (i in all_top20_unique)
{
  # data.frame with the selected taxonomic group
  df <- filter(melt_df, genus==i)
  x <- pairwise.wilcox.test(df$Abundance, df$sample_type_depth,
                            p.adjust.method = "BH")
  x <- as.data.frame(x[["p.value"]])
  x$genus <- i
  x$comparison <- rownames(x)
  pval.list[[i]] <- x
}

pval.list_df12 <- rbindlist(pval.list)
pval.list_df12 <- as.data.frame(pval.list_df12)
# make into long format: where the new column  called "vs" contains the sample sample_type_depth comparison, and new column "p_value" contains the p_values
pval.list_df12 <- gather(pval.list_df12, vs, p_value, 1, factor_key=TRUE)
# adjust p values
pval.list_df12 <- adjust_pvalue(pval.list_df12, p.col = "p_value", output.col = "adj_p", method = "BH")

8.13. only conventional 30-80

subset <- subset_samples(ps_RA_nf_genus_pruned, sample_type_depth=="30...40_conventional" | sample_type_depth=="40...80_conventional")
subset
## phyloseq-class experiment-level object
## otu_table()   OTU Table:         [ 96 taxa and 18 samples ]
## sample_data() Sample Data:       [ 18 samples by 29 sample variables ]
## tax_table()   Taxonomy Table:    [ 96 taxa by 1 taxonomic ranks ]
#create data table
melt_df <-  psmelt(subset)

pval.list <- list()

for (i in all_top20_unique)
{
  # data.frame with the selected taxonomic group
  df <- filter(melt_df, genus==i)
  x <- pairwise.wilcox.test(df$Abundance, df$sample_type_depth,
                            p.adjust.method = "BH")
  x <- as.data.frame(x[["p.value"]])
  x$genus <- i
  x$comparison <- rownames(x)
  pval.list[[i]] <- x
}

pval.list_df13 <- rbindlist(pval.list)
pval.list_df13 <- as.data.frame(pval.list_df13)
# make into long format: where the new column  called "vs" contains the sample sample_type_depth comparison, and new column "p_value" contains the p_values
pval.list_df13 <- gather(pval.list_df13, vs, p_value, 1, factor_key=TRUE)
# adjust p values
pval.list_df13 <- adjust_pvalue(pval.list_df13, p.col = "p_value", output.col = "adj_p", method = "BH")

8.14. only meadow 0-20

subset <- subset_samples(ps_RA_nf_genus_pruned, sample_type_depth=="0...10_meadow" | sample_type_depth=="10...20_meadow")
subset
## phyloseq-class experiment-level object
## otu_table()   OTU Table:         [ 96 taxa and 16 samples ]
## sample_data() Sample Data:       [ 16 samples by 29 sample variables ]
## tax_table()   Taxonomy Table:    [ 96 taxa by 1 taxonomic ranks ]
#create data table
melt_df <-  psmelt(subset)

pval.list <- list()

for (i in all_top20_unique)
{
  # data.frame with the selected taxonomic group
  df <- filter(melt_df, genus==i)
  x <- pairwise.wilcox.test(df$Abundance, df$sample_type_depth,
                            p.adjust.method = "BH")
  x <- as.data.frame(x[["p.value"]])
  x$genus <- i
  x$comparison <- rownames(x)
  pval.list[[i]] <- x
}

pval.list_df14 <- rbindlist(pval.list)
pval.list_df14 <- as.data.frame(pval.list_df14)
# make into long format: where the new column  called "vs" contains the sample sample_type_depth comparison, and new column "p_value" contains the p_values
pval.list_df14 <- gather(pval.list_df14, vs, p_value, 1, factor_key=TRUE)
# adjust p values
pval.list_df14 <- adjust_pvalue(pval.list_df14, p.col = "p_value", output.col = "adj_p", method = "BH")

8.15. only meadow 10-30

subset <- subset_samples(ps_RA_nf_genus_pruned, sample_type_depth=="10...20_meadow" | sample_type_depth=="20...30_meadow")
subset
## phyloseq-class experiment-level object
## otu_table()   OTU Table:         [ 96 taxa and 16 samples ]
## sample_data() Sample Data:       [ 16 samples by 29 sample variables ]
## tax_table()   Taxonomy Table:    [ 96 taxa by 1 taxonomic ranks ]
#create data table
melt_df <-  psmelt(subset)

pval.list <- list()

for (i in all_top20_unique)
{
  # data.frame with the selected taxonomic group
  df <- filter(melt_df, genus==i)
  x <- pairwise.wilcox.test(df$Abundance, df$sample_type_depth,
                            p.adjust.method = "BH")
  x <- as.data.frame(x[["p.value"]])
  x$genus <- i
  x$comparison <- rownames(x)
  pval.list[[i]] <- x
}

pval.list_df15 <- rbindlist(pval.list)
pval.list_df15 <- as.data.frame(pval.list_df15)
# make into long format: where the new column  called "vs" contains the sample sample_type_depth comparison, and new column "p_value" contains the p_values
pval.list_df15 <- gather(pval.list_df15, vs, p_value, 1, factor_key=TRUE)
# adjust p values
pval.list_df15 <- adjust_pvalue(pval.list_df15, p.col = "p_value", output.col = "adj_p", method = "BH")

8.16. only meadow 20-40

subset <- subset_samples(ps_RA_nf_genus_pruned, sample_type_depth=="20...30_meadow" | sample_type_depth=="30...40_meadow")
subset
## phyloseq-class experiment-level object
## otu_table()   OTU Table:         [ 96 taxa and 16 samples ]
## sample_data() Sample Data:       [ 16 samples by 29 sample variables ]
## tax_table()   Taxonomy Table:    [ 96 taxa by 1 taxonomic ranks ]
#create data table
melt_df <-  psmelt(subset)

pval.list <- list()

for (i in all_top20_unique)
{
  # data.frame with the selected taxonomic group
  df <- filter(melt_df, genus==i)
  x <- pairwise.wilcox.test(df$Abundance, df$sample_type_depth,
                            p.adjust.method = "BH")
  x <- as.data.frame(x[["p.value"]])
  x$genus <- i
  x$comparison <- rownames(x)
  pval.list[[i]] <- x
}

pval.list_df16 <- rbindlist(pval.list)
pval.list_df16 <- as.data.frame(pval.list_df16)
# make into long format: where the new column  called "vs" contains the sample sample_type_depth comparison, and new column "p_value" contains the p_values
pval.list_df16 <- gather(pval.list_df16, vs, p_value, 1, factor_key=TRUE)
# adjust p values
pval.list_df16 <- adjust_pvalue(pval.list_df16, p.col = "p_value", output.col = "adj_p", method = "BH")

8.17. only meadow 30-80

subset <- subset_samples(ps_RA_nf_genus_pruned, sample_type_depth=="30...40_meadow" | sample_type_depth=="40...80_meadow")
subset
## phyloseq-class experiment-level object
## otu_table()   OTU Table:         [ 96 taxa and 16 samples ]
## sample_data() Sample Data:       [ 16 samples by 29 sample variables ]
## tax_table()   Taxonomy Table:    [ 96 taxa by 1 taxonomic ranks ]
#create data table
melt_df <-  psmelt(subset)

pval.list <- list()

for (i in all_top20_unique)
{
  # data.frame with the selected taxonomic group
  df <- filter(melt_df, genus==i)
  x <- pairwise.wilcox.test(df$Abundance, df$sample_type_depth,
                            p.adjust.method = "BH")
  x <- as.data.frame(x[["p.value"]])
  x$genus <- i
  x$comparison <- rownames(x)
  pval.list[[i]] <- x
}

pval.list_df17 <- rbindlist(pval.list)
pval.list_df17 <- as.data.frame(pval.list_df17)
# make into long format: where the new column  called "vs" contains the sample sample_type_depth comparison, and new column "p_value" contains the p_values
pval.list_df17 <- gather(pval.list_df17, vs, p_value, 1, factor_key=TRUE)
# adjust p values
pval.list_df17 <- adjust_pvalue(pval.list_df17, p.col = "p_value", output.col = "adj_p", method = "BH")

11. combine all p-values

all.pvals <- rbind(pval.list_df1, pval.list_df2, pval.list_df3, pval.list_df4, pval.list_df5, pval.list_df6, pval.list_df7, pval.list_df8, pval.list_df9, pval.list_df10, pval.list_df11, pval.list_df12, pval.list_df13, pval.list_df14, pval.list_df14, pval.list_df16, pval.list_df17)

all.sig.pvals <- all.pvals[all.pvals$adj_p <= 0.05, ]   

all.sig.genus <- unique(all.sig.pvals$genus)
length(all.sig.genus)
## [1] 70

12. Make ps with the significant taxa and get taxa mean and se values

# only keep sig

# filter by taxa name
ps_RA_nf_genus_pruned_sig <- prune_taxa(all.sig.genus, ps_RA_nf_genus_pruned)
ps_Heatmap <- ps_RA_nf_genus_pruned_sig
ps_Heatmap
## phyloseq-class experiment-level object
## otu_table()   OTU Table:         [ 69 taxa and 125 samples ]
## sample_data() Sample Data:       [ 125 samples by 29 sample variables ]
## tax_table()   Taxonomy Table:    [ 69 taxa by 1 taxonomic ranks ]
rm(ps_RA_nf_genus_pruned_sig)

df <- psmelt(ps_Heatmap)

x <- df %>%
  group_by(OTU, sample_type, depth) %>%
  summarise(mean = mean(Abundance, na.rm = TRUE), se = (sd(Abundance, na.rm = TRUE)/sqrt(length((Abundance)))))

x
## # A tibble: 1,035 × 5
## # Groups:   OTU, sample_type [207]
##    OTU        sample_type depth       mean       se
##    <chr>      <fct>       <chr>      <dbl>    <dbl>
##  1 Acremonium meadow      0...10  0.00996  0.00774 
##  2 Acremonium meadow      10...20 0.00372  0.000836
##  3 Acremonium meadow      20...30 0.00656  0.00243 
##  4 Acremonium meadow      30...40 0.00759  0.00494 
##  5 Acremonium meadow      40...   0.000577 0.000564
##  6 Acremonium organic     0...10  0.00551  0.00103 
##  7 Acremonium organic     10...20 0.0158   0.00834 
##  8 Acremonium organic     20...30 0.0157   0.00325 
##  9 Acremonium organic     30...40 0.0105   0.00452 
## 10 Acremonium organic     40...   0.0713   0.0538  
## # ℹ 1,025 more rows

13. MAKE HEATMAP

13.1. Add FUNGuild annotation for HEATMAP

13.1.1. Build FUNGuild

I need to make a separate funguild phyloseq for heatmap, where I have only the genus and higher level annotation (no species level)

setwd('C:\\Users\\lehakkin\\PhD\\Fungal_ITS_soil_vertical_profile')

FG <- read.csv2("FUNGuild_31_05_2024.csv")

# what different levels there are:
unique(FG$taxonomicLevel)
## [1] "Genus"      "Species"    "Variety"    "Family"     "Order"     
## [6] "Phylum"     "Form"       "Subspecies"

I will get the annotations from genus and higher tax levels:

Genus

fg <- FG[FG$taxonomicLevel == "Genus", ]      
tax_table <- as.data.frame(tax_table(ps))
tax_table$OTU <- rownames(tax_table)

# merge tables
colnames(fg)[1] <- "genus"
FG_tax_table <- merge(tax_table, fg, by = "genus", all.x = TRUE)


# modify "possible"s to NA's at guild and trophic mode -level
FG_tax_table$guild[FG_tax_table$confidenceRanking == "Possible"] <- NA
FG_tax_table$trophicMode[FG_tax_table$confidenceRanking == "Possible"] <- NA

# only keep the OTU, TrophicMode and guild
# and OTU as row names

FG_tax_table <- FG_tax_table[c("OTU", "trophicMode", "guild")]

# and OTU as row names
FG_tax_table <- column_to_rownames(FG_tax_table, var = "OTU")

# change column names
colnames(FG_tax_table)[1] <- "trophicMode_gen"
colnames(FG_tax_table)[2] <- "guild_gen"

# save with new name
FUNGuild_gen <- FG_tax_table

Family

fg <- FG[FG$taxonomicLevel == "Family", ]      
tax_table <- as.data.frame(tax_table(ps))
tax_table$OTU <- rownames(tax_table)

# merge tables
colnames(fg)[1] <- "family"
FG_tax_table <- merge(tax_table, fg, by = "family", all.x = TRUE)

# modify "possible"s to NA's at guild and trophic mode -level
FG_tax_table$guild[FG_tax_table$confidenceRanking == "Possible"] <- NA
FG_tax_table$trophicMode[FG_tax_table$confidenceRanking == "Possible"] <- NA

# only keep the OTU, TrophicMode and guild
FG_tax_table <- FG_tax_table[c("OTU", "trophicMode", "guild")]

# and taxon as row names
FG_tax_table <- column_to_rownames(FG_tax_table, var = "OTU")

# change column names
colnames(FG_tax_table)[1] <- "trophicMode_fam"
colnames(FG_tax_table)[2] <- "guild_fam"

# save with new name
FUNGuild_fam <- FG_tax_table

Order

fg <- FG[FG$taxonomicLevel == "Order", ]      
tax_table <- as.data.frame(tax_table(ps))
tax_table$OTU <- rownames(tax_table)

# merge tables
colnames(fg)[1] <- "order"
FG_tax_table <- merge(tax_table, fg, by = "order", all.x = TRUE)

# modify "possible"s to NA's at guild and trophic mode -level
FG_tax_table$guild[FG_tax_table$confidenceRanking == "Possible"] <- NA
FG_tax_table$trophicMode[FG_tax_table$confidenceRanking == "Possible"] <- NA

# only keep the OTU, TrophicMode and guild
FG_tax_table <- FG_tax_table[c("OTU", "trophicMode", "guild")]

# and taxon as row names
FG_tax_table <- column_to_rownames(FG_tax_table, var = "OTU")

# change column names
colnames(FG_tax_table)[1] <- "trophicMode_ord"
colnames(FG_tax_table)[2] <- "guild_ord"

# save with new name
FUNGuild_ord <- FG_tax_table

Phylum

fg <- FG[FG$taxonomicLevel == "Phylum", ]      
tax_table <- as.data.frame(tax_table(ps))
tax_table$OTU <- rownames(tax_table)

# merge tables
colnames(fg)[1] <- "phylum"
FG_tax_table <- merge(tax_table, fg, by = "phylum", all.x = TRUE)

# modify "possible"s to NA's at guild and trophic mode -level
FG_tax_table$guild[FG_tax_table$confidenceRanking == "Possible"] <- NA
FG_tax_table$trophicMode[FG_tax_table$confidenceRanking == "Possible"] <- NA

# only keep the OTU, TrophicMode and guild
FG_tax_table <- FG_tax_table[c("OTU", "trophicMode", "guild")]

# and OTU as row names
FG_tax_table <- column_to_rownames(FG_tax_table, var = "OTU")

# change column names
colnames(FG_tax_table)[1] <- "trophicMode_phy"
colnames(FG_tax_table)[2] <- "guild_phy"

# save with new name
FUNGuild_phy <- FG_tax_table

Combine all annotations:Genus Family Order Phylum

x <- left_join(rownames_to_column(FUNGuild_gen), rownames_to_column(FUNGuild_fam), by = "rowname")
x <- left_join(x, rownames_to_column(FUNGuild_ord), by = "rowname")
x <- left_join(x, rownames_to_column(FUNGuild_phy), by = "rowname")
# get the value from another column if NA
y <- x %>%
  mutate(trophicMode_gen = coalesce(trophicMode_gen,trophicMode_fam))

# get the value from another column if NA
y <- y %>%
  mutate(guild_gen = coalesce(guild_gen,guild_fam))

# same for order
# get the value from another column if NA
y <- y %>%
  mutate(trophicMode_gen = coalesce(trophicMode_gen,trophicMode_ord))

# get the value from another column if NA
y <- y %>%
  mutate(guild_gen = coalesce(guild_gen,guild_ord))

# same for phylum
# get the value from another column if NA
y <- y %>%
  mutate(trophicMode_gen = coalesce(trophicMode_gen,trophicMode_phy))

# get the value from another column if NA
y <- y %>%
  mutate(guild_gen = coalesce(guild_gen,guild_phy))

# then rename the gen columns
colnames(y)[2] <- "trophicMode"
colnames(y)[3] <- "guild"


# remove the rest of the columns
y <- y[, -c(4:11)]

# rownames
y2 <- y[,-1]
rownames(y2) <- y[,1]
# Remove empty spaces and remove also "|"

y2$trophicMode <- gsub(" ", "", y2$trophicMode, fixed = TRUE)
y2$guild <- gsub("|", "", y2$guild, fixed = TRUE)
y3 <- left_join(rownames_to_column(y2), rownames_to_column(tax_table), by = "rowname")
row.names(y3) <- y3$rowname
y3 <- y3[, -1]

Define AMFs, Ectomycorrhizal and Plant pathogens

Here in FUNGuild column:

  • Ectomycorrhizal = guilds containing “Ectomycorrhizal” from trophic mode Symbiotroph only, NOTE! this is same as pure Ectomycorrhizal!!
  • Arbuscular Mycorrhizal = all guilds containing “Arbuscular Mycorrhizal” from trophic mode Symbiotroph (no AMF in other trophic modes)
  • Endophyte = Pure endophytes from trophic mode Symbiotroph only
  • Plant Pathogen = Pure Plant Pathogens from trophic mode Pathotroph only
z <- y3

z <- z %>%  
   mutate(FG = case_when(grepl("Ectomycorrhizal", guild) & trophicMode=="Symbiotroph" ~ "Ectomycorrhizal", grepl("Arbuscular", guild) ~ "Arbuscular Mycorrhizal", guild == "Endophyte" & trophicMode=="Symbiotroph" ~ "Endophyte", guild=="Plant Pathogen" & trophicMode=="Pathotroph"~ "Plant Pathogen"))
z <- z %>%
  mutate(FUNGuild = case_when(FG == "Ectomycorrhizal" |  FG == "Arbuscular Mycorrhizal" | FG == "Endophyte" | FG == "Plant Pathogen" ~ FG, guild != "Ectomycorrhizal" |  FG != "Arbuscular Mycorrhizal" |  FG != "Endophyte" | FG != "Plant Pathogen" ~ z$trophicMode))
FG_tax_table <- z

# remove the FG column

FG_tax_table <- FG_tax_table[, -c(11)]

Check the different written forms, if I have empty spaces?

unique(FG_tax_table$trophicMode)
##  [1] "Saprotroph"                        NA                                 
##  [3] "Pathotroph-Saprotroph"             "Saprotroph-Symbiotroph"           
##  [5] "Pathotroph"                        "Symbiotroph"                      
##  [7] "Pathotroph-Saprotroph-Symbiotroph" "Pathotroph-Symbiotroph"           
##  [9] "Saprotroph-Saprotroph-Symbiotroph" "Pathotroph-Pathotroph-Saprotroph"
#unique(FG_tax_table$guild)
unique(FG_tax_table$FUNGuild)
##  [1] "Saprotroph"                        NA                                 
##  [3] "Pathotroph-Saprotroph"             "Saprotroph-Symbiotroph"           
##  [5] "Plant Pathogen"                    "Ectomycorrhizal"                  
##  [7] "Arbuscular Mycorrhizal"            "Pathotroph"                       
##  [9] "Pathotroph-Saprotroph-Symbiotroph" "Pathotroph-Symbiotroph"           
## [11] "Symbiotroph"                       "Saprotroph-Saprotroph-Symbiotroph"
## [13] "Endophyte"                         "Pathotroph-Pathotroph-Saprotroph"
# change some names for FUNGuild
FG_tax_table$FUNGuild[FG_tax_table$FUNGuild=="Symbiotroph"] <- "Other Symbiotroph"
FG_tax_table$FUNGuild[FG_tax_table$FUNGuild=="Pathotroph"] <- "Other Pathotroph"
FG_tax_table$FUNGuild[FG_tax_table$FUNGuild=="Pathotroph-Pathotroph-Saprotroph"] <- "Pathotroph-Saprotroph"
FG_tax_table$FUNGuild[FG_tax_table$FUNGuild=="Saprotroph-Saprotroph-Symbiotroph"] <- "Saprotroph-Symbiotroph"

# and for trophicMode
FG_tax_table$trophicMode[FG_tax_table$trophicMode=="Pathotroph-Pathotroph-Saprotroph"] <- "Pathotroph-Saprotroph"
FG_tax_table$trophicMode[FG_tax_table$trophicMode=="Saprotroph-Saprotroph-Symbiotroph"] <- "Saprotroph-Symbiotroph"

# reorder
FG_tax_table <- FG_tax_table[, c(1:2, 11, 3:10)]

Check again

unique(FG_tax_table$trophicMode)
## [1] "Saprotroph"                        NA                                 
## [3] "Pathotroph-Saprotroph"             "Saprotroph-Symbiotroph"           
## [5] "Pathotroph"                        "Symbiotroph"                      
## [7] "Pathotroph-Saprotroph-Symbiotroph" "Pathotroph-Symbiotroph"
#unique(FG_tax_table$guild)
unique(FG_tax_table$FUNGuild)
##  [1] "Saprotroph"                        NA                                 
##  [3] "Pathotroph-Saprotroph"             "Saprotroph-Symbiotroph"           
##  [5] "Plant Pathogen"                    "Ectomycorrhizal"                  
##  [7] "Arbuscular Mycorrhizal"            "Other Pathotroph"                 
##  [9] "Pathotroph-Saprotroph-Symbiotroph" "Pathotroph-Symbiotroph"           
## [11] "Other Symbiotroph"                 "Endophyte"

13.1.2. Make ps_FG for HEATMAP

ps_FG_HEATMAP_no_clusters <- phyloseq(otu_table(ps), tax_table(as.matrix(FG_tax_table)), sample_data(ps))
ps_FG_HEATMAP_no_clusters
## phyloseq-class experiment-level object
## otu_table()   OTU Table:         [ 20610 taxa and 140 samples ]
## sample_data() Sample Data:       [ 140 samples by 28 sample variables ]
## tax_table()   Taxonomy Table:    [ 20610 taxa by 11 taxonomic ranks ]

13.1.3. Make FIGURE

ps_RA <- transform(ps, "compositional")

# Pick relative abundances (compositional) and sample metadata 

ps_RA_nf <- subset_samples(ps_RA, sample_type!="forest")

# You have only submitted samples, not OTUs. Define prevalence of each taxa
# (in how many samples did each taxa appear at least once)
prev0 = apply(X = otu_table(ps_RA_nf),
              MARGIN = ifelse(taxa_are_rows(ps_RA_nf), yes = 1, no = 2),
              FUN = function(x){sum(x > 0)})

# Execute prevalence filter, using `prune_taxa()` function
ps_RA_nf = prune_taxa((prev0 > 0), ps_RA_nf)
ps_RA_nf
## phyloseq-class experiment-level object
## otu_table()   OTU Table:         [ 19820 taxa and 125 samples ]
## sample_data() Sample Data:       [ 125 samples by 28 sample variables ]
## tax_table()   Taxonomy Table:    [ 19820 taxa by 7 taxonomic ranks ]
all.sig.genus <- unique(all.sig.pvals$genus)
length(all.sig.genus)
## [1] 70
# first aggregate to genus
ps_RA_nf_genus <- aggregate_rare(ps_RA_nf, level = "genus", detection = 0/140, prevalence = 0/100)

all_top20_unique <- all_top20_unique_df$all_top20_unique

# filter by taxa name
ps_RA_nf_genus_pruned <- prune_taxa(all_top20_unique, ps_RA_nf_genus)
ps_RA_nf_genus_pruned
## phyloseq-class experiment-level object
## otu_table()   OTU Table:         [ 96 taxa and 125 samples ]
## sample_data() Sample Data:       [ 125 samples by 28 sample variables ]
## tax_table()   Taxonomy Table:    [ 96 taxa by 1 taxonomic ranks ]
# filter by taxa name
ps_RA_nf_genus_pruned_sig <- prune_taxa(all.sig.genus, ps_RA_nf_genus_pruned)
ps_Heatmap <- ps_RA_nf_genus_pruned_sig
ps_Heatmap
## phyloseq-class experiment-level object
## otu_table()   OTU Table:         [ 69 taxa and 125 samples ]
## sample_data() Sample Data:       [ 125 samples by 28 sample variables ]
## tax_table()   Taxonomy Table:    [ 69 taxa by 1 taxonomic ranks ]
FG_tax <- as.data.frame(as.matrix(tax_table(ps_FG_HEATMAP_no_clusters)))

13.1.5.Transform RA and z scaled

ps_genus <- aggregate_rare(ps, level = 'genus', detection = 0/100, prevalence = 0/100)
ps_genus
## phyloseq-class experiment-level object
## otu_table()   OTU Table:         [ 943 taxa and 140 samples ]
## sample_data() Sample Data:       [ 140 samples by 28 sample variables ]
## tax_table()   Taxonomy Table:    [ 943 taxa by 1 taxonomic ranks ]
# subset
ps_genus_NO_FOREST <- subset_samples(ps_genus, sample_type!="forest")
ps_genus_NO_FOREST
## phyloseq-class experiment-level object
## otu_table()   OTU Table:         [ 943 taxa and 125 samples ]
## sample_data() Sample Data:       [ 125 samples by 28 sample variables ]
## tax_table()   Taxonomy Table:    [ 943 taxa by 1 taxonomic ranks ]
meta_nf <- meta(ps_genus_NO_FOREST)

meta_nf$Depth <- gsub("...", "-", meta_nf$depth, fixed = TRUE)

meta_nf$Depth[meta_nf$Depth=="40-"] <- "40-80"


meta_nf$Depth[meta_nf$Depth=="0-10"] <- "0-10 cm"
meta_nf$Depth[meta_nf$Depth=="10-20"] <- "10-20 cm"
meta_nf$Depth[meta_nf$Depth=="20-30"] <- "20-30 cm"
meta_nf$Depth[meta_nf$Depth=="30-40"] <- "30-40 cm"
meta_nf$Depth[meta_nf$Depth=="40-80"] <- "40-80 cm"

# RA-transform
ps_genus_NO_FOREST_RA <- microbiome::transform(ps_genus_NO_FOREST, 'compositional')

# Z-transform
ps_genus_NO_FOREST_RA_z <- microbiome::transform(ps_genus_NO_FOREST_RA, 'Z', log10 = FALSE)

# subset to only the significant genus from above

hetamap.taxa <- taxa_names(ps_Heatmap)
data <- as(otu_table(ps_genus_NO_FOREST_RA_z), "matrix")
data <- as.data.frame(data)
data_subset <- data[(row.names(data) %in% hetamap.taxa),]

data_subset <- as.matrix(data_subset)

# add annotations "Depth" and "Treatment"
my_sample_col <- data.frame(meta_nf[c("Depth", "sample_type")], row.names = row.names(meta_nf))
colnames(my_sample_col) <- c("Depth", "Treatment")


x <- as.data.frame(t(data_subset))
y <- as.data.frame(my_sample_col)
colnames(y) <- c("Depth", "sample_type")
z <- dplyr::left_join(rownames_to_column(x), rownames_to_column(y), by=c("rowname" = "rowname"))
colnames(z)[1] <- "ID"

f <- z %>%
  group_by(sample_type, Depth) %>%
  summarise_all(mean)

# remove ID column
f <- f[ , -3]

# make new ID column, which is the soiltypedepth
library(stringr)
f$ID <- str_c(f$sample_type, '_', f$Depth)
# make new df with just sample type, depth and sampletypedepth

df <- f[c("sample_type", "Depth", "ID")]
# remove extra columns from f
f2 <- f[, -c(1, 2, 72)]
# column into rownames
rownames(f2) <- f$ID
# make into numeric matrix
f3 <- data.matrix(f2, rownames.force = NA)
f4 <- t(f3)

# same for df
# remove extra columns from df
df2 <- df[ , -c(3)]
# column into rownames
rownames(df2) <- df$ID
my_sample_col2 <- df2
my_sample_col3 <- as.data.frame(my_sample_col2)
rownames(my_sample_col3) <- df$ID
# lets add annotations of samples

# add annotations "Depth" and "soil_type" and change order
colnames(my_sample_col3) <- c("Treatment", "Depth")
my_sample_col3 <- my_sample_col3[, c(2,1)]

13.1.6 Finally plot HEATMAP

library("pheatmap")
library("ggplotify")

# only keep the genus and FUNGuild
x <- FG_tax[, c(9, 3)]
# Remove duplicates by single column
FUNGuild_tax_table <- x[!duplicated(x$genus), ]
dim(FUNGuild_tax_table)
## [1] 943   2
# and genus as row names
rownames(FUNGuild_tax_table) <- NULL
FUNGuild_tax_table <- column_to_rownames(FUNGuild_tax_table, var = "genus")

# view data frame
unique(FUNGuild_tax_table$FUNGuild)
##  [1] NA                                  "Saprotroph-Symbiotroph"           
##  [3] "Arbuscular Mycorrhizal"            "Pathotroph-Saprotroph-Symbiotroph"
##  [5] "Pathotroph-Saprotroph"             "Saprotroph"                       
##  [7] "Endophyte"                         "Ectomycorrhizal"                  
##  [9] "Other Pathotroph"                  "Plant Pathogen"                   
## [11] "Other Symbiotroph"                 "Pathotroph-Symbiotroph"
FUNGuild_tax_table$FUNGuild <- as.factor(FUNGuild_tax_table$FUNGuild)

# change level order
FUNGuild_tax_table$FUNGuild <- factor(FUNGuild_tax_table$FUNGuild, levels = c("Plant Pathogen", "Other Pathotroph", "Pathotroph-Saprotroph", "Pathotroph-Saprotroph-Symbiotroph", "Pathotroph-Symbiotroph", "Saprotroph", "Saprotroph-Symbiotroph", "Other Symbiotroph", "Ectomycorrhizal", "Endophyte", "Arbuscular Mycorrhizal"))
levels(FUNGuild_tax_table$FUNGuild)
##  [1] "Plant Pathogen"                    "Other Pathotroph"                 
##  [3] "Pathotroph-Saprotroph"             "Pathotroph-Saprotroph-Symbiotroph"
##  [5] "Pathotroph-Symbiotroph"            "Saprotroph"                       
##  [7] "Saprotroph-Symbiotroph"            "Other Symbiotroph"                
##  [9] "Ectomycorrhizal"                   "Endophyte"                        
## [11] "Arbuscular Mycorrhizal"
#Create color palette

my_colour = list(
  "Treatment" = c(meadow = "#fbc02d", organic = "#8a8a8a", conventional =  "#b71c1c"),
  Depth = c('0-10 cm' = "#387212", '10-20 cm' = "#ADC476", '20-30 cm' = "#D8D2BA",'30-40 cm' = "#907852", '40-80 cm' = "#6A4C3A"),
  FUNGuild = c("Plant Pathogen" = "deeppink", "Other Pathotroph" = "#d6849a", "Pathotroph-Saprotroph" = "#e3adbc", "Pathotroph-Saprotroph-Symbiotroph" = "#f1d6dd", "Pathotroph-Symbiotroph" = "#faf1f4", "Saprotroph" = "#CBBEAD", "Saprotroph-Symbiotroph" = "darkseagreen", "Other Symbiotroph" = "lightgreen", "Ectomycorrhizal" = "darkgreen", "Endophyte" = "#A2CF31", "Arbuscular Mycorrhizal" = "darkolivegreen1"))

# add gap between sample_types
gp_col = which(diff(as.numeric(factor(my_sample_col3$Treatment)))!=0)

p2 <- as.ggplot(function() pheatmap(f4, cluster_cols = FALSE, cluster_rows = TRUE, annotation_col = my_sample_col3, annotation_colors = my_colour, gaps_col = gp_col, color=colorRampPalette(c("navy", "white", "red"))(50), show_colnames = FALSE, legend = TRUE, annotation_row = FUNGuild_tax_table, border_color = NA, cellheight = 16, fontsize = 14, fontsize_row = 14, annotation_names_row = FALSE, annotation_names_col = FALSE))

p2

B) HEATMAP for FOREST ONLY fungal genera no filtering by significant genera

1. load packages and data

library('phyloseq')
library("cowplot")
library("dplyr")
library("ggplot2")
library("vegan")
library("microbiome")
library("tibble")


setwd('C:\\Users\\lehakkin\\PhD\\Fungal_ITS_soil_vertical_profile')

load('ps_FINAL')
ps
## phyloseq-class experiment-level object
## otu_table()   OTU Table:         [ 20610 taxa and 140 samples ]
## sample_data() Sample Data:       [ 140 samples by 28 sample variables ]
## tax_table()   Taxonomy Table:    [ 20610 taxa by 7 taxonomic ranks ]
# Pick relative abundances (compositional) and sample metadata 
ps_RA <- microbiome::transform(ps, "compositional")
ps_RA_nf <- subset_samples(ps_RA, sample_type=="forest")

# You have only submitted samples, not OTUs. Define prevalence of each taxa
# (in how many samples did each taxa appear at least once)
prev0 = apply(X = otu_table(ps_RA_nf),
              MARGIN = ifelse(taxa_are_rows(ps_RA_nf), yes = 1, no = 2),
              FUN = function(x){sum(x > 0)})

# Execute prevalence filter, using `prune_taxa()` function
ps_RA_nf = prune_taxa((prev0 > 0), ps_RA_nf)
ps_RA_nf
## phyloseq-class experiment-level object
## otu_table()   OTU Table:         [ 5398 taxa and 15 samples ]
## sample_data() Sample Data:       [ 15 samples by 28 sample variables ]
## tax_table()   Taxonomy Table:    [ 5398 taxa by 7 taxonomic ranks ]
meta_nf <- meta(ps_RA_nf)

2. Overview of the heatmap analysis

I will use RA values For the final heatmap figure the RA values will be standardizes (z-transformed)

  1. First pick the 10 most abundant genera in each soil type depth
  2. Make a HEATMAP of the most abundant taxa only

3. at genus level

ps_genus <- aggregate_rare(ps, level = 'genus', detection = 0/100, prevalence = 0/140)
ps_genus
## phyloseq-class experiment-level object
## otu_table()   OTU Table:         [ 943 taxa and 140 samples ]
## sample_data() Sample Data:       [ 140 samples by 28 sample variables ]
## tax_table()   Taxonomy Table:    [ 943 taxa by 1 taxonomic ranks ]
ps_genus_RA <- microbiome::transform(ps_genus, "compositional")

ps_genus_RA_pruned <- subset_samples(ps_genus_RA, sample_type=="forest")
ps_genus_RA_pruned
## phyloseq-class experiment-level object
## otu_table()   OTU Table:         [ 943 taxa and 15 samples ]
## sample_data() Sample Data:       [ 15 samples by 28 sample variables ]
## tax_table()   Taxonomy Table:    [ 943 taxa by 1 taxonomic ranks ]

4. Pick 10 most abund taxa in each forest soil layer

# sample wise filtering according to most abund. genera

# Initialize an empty list to store the taxa
abund.taxa <- list()

for (i in meta_nf$sample_type) {
  for (j in meta_nf$depth) {
    x <- sample_names(sample_data(ps_genus_RA_pruned)[sample_data(ps_genus_RA_pruned)$sample_type == i & sample_data(ps_genus_RA_pruned)$depth == j,])

# Calculate taxa mean of the selected samples
top10 <- head(sort(rowMeans(otu_table(ps_genus_RA_pruned)[,x]), decreasing = TRUE), 10)

result_name <- paste("sample_type", i, "depth", j, sep = "_")

abund.taxa[[result_name]] <- top10
  }
}

5. Combine picked taxa

management_layer <- c(names(abund.taxa))

all_top10 <- c()

for (i in management_layer) {
  top10 <- c(names(abund.taxa[[i]]))
  all_top10 <- c(all_top10, top10)

}

all_top10_unique <- unique(all_top10)
length(all_top10_unique)
## [1] 31

6. Make a phyloseq of picked taxa

# first aggregate to genus
ps_RA_nf_genus <- aggregate_rare(ps_RA_nf, level = "genus", detection = 0/140, prevalence = 0/100)

# filter by taxa name
ps_RA_nf_genus_pruned <- prune_taxa(all_top10_unique, ps_RA_nf_genus)
ps_RA_nf_genus_pruned
## phyloseq-class experiment-level object
## otu_table()   OTU Table:         [ 31 taxa and 15 samples ]
## sample_data() Sample Data:       [ 15 samples by 28 sample variables ]
## tax_table()   Taxonomy Table:    [ 31 taxa by 1 taxonomic ranks ]
setwd('C:\\Users\\lehakkin\\PhD\\Fungal_ITS_soil_vertical_profile')

ps_genus_FOREST_ONLY_HETAMAP <- ps_RA_nf_genus_pruned
#save(ps_genus_FOREST_ONLY_HETAMAP, file='ps_genus_nf_HETAMAP_all_top10_FOREST_ONLY_unclassified_included')

9. Get taxa mean and se values

ps_Heatmap <- ps_genus_FOREST_ONLY_HETAMAP
ps_Heatmap
## phyloseq-class experiment-level object
## otu_table()   OTU Table:         [ 31 taxa and 15 samples ]
## sample_data() Sample Data:       [ 15 samples by 28 sample variables ]
## tax_table()   Taxonomy Table:    [ 31 taxa by 1 taxonomic ranks ]
df <- psmelt(ps_Heatmap)

x <- df %>%
  group_by(OTU, sample_type, depth) %>%
  summarise(mean = mean(Abundance, na.rm = TRUE), se = (sd(Abundance, na.rm = TRUE)/sqrt(length((Abundance)))))

x
## # A tibble: 155 × 5
## # Groups:   OTU, sample_type [31]
##    OTU                     sample_type depth        mean        se
##    <chr>                   <fct>       <chr>       <dbl>     <dbl>
##  1 Ascomycota_unclassified forest      0...10  0.0204    0.00318  
##  2 Ascomycota_unclassified forest      10...20 0.0206    0.00656  
##  3 Ascomycota_unclassified forest      20...30 0.0245    0.0166   
##  4 Ascomycota_unclassified forest      30...40 0.00872   0.00634  
##  5 Ascomycota_unclassified forest      40...   0.0458    0.0458   
##  6 Botrytis                forest      0...10  0.0000325 0.0000325
##  7 Botrytis                forest      10...20 0.0000800 0.0000800
##  8 Botrytis                forest      20...30 0         0        
##  9 Botrytis                forest      30...40 0         0        
## 10 Botrytis                forest      40...   0.163     0.163    
## # ℹ 145 more rows

13. MAKE HEATMAP

13.1. Add FUNGuild annotation for HEATMAP

13.1.1. Build FUNGuild

I need to make a separate funguild phyloseq for heatmap, where I have only the genus and higher level annotation (no species level)

setwd('C:\\Users\\lehakkin\\PhD\\Fungal_ITS_soil_vertical_profile')

FG <- read.csv2("FUNGuild_31_05_2024.csv")

# what different levels there are:
unique(FG$taxonomicLevel)
## [1] "Genus"      "Species"    "Variety"    "Family"     "Order"     
## [6] "Phylum"     "Form"       "Subspecies"

I will get the annotations from genus and higher tax levels:

Genus

fg <- FG[FG$taxonomicLevel == "Genus", ]      
tax_table <- as.data.frame(tax_table(ps))
tax_table$OTU <- rownames(tax_table)

# merge tables
colnames(fg)[1] <- "genus"
FG_tax_table <- merge(tax_table, fg, by = "genus", all.x = TRUE)


# modify "possible"s to NA's at guild and trophic mode -level
FG_tax_table$guild[FG_tax_table$confidenceRanking == "Possible"] <- NA
FG_tax_table$trophicMode[FG_tax_table$confidenceRanking == "Possible"] <- NA

# only keep the OTU, TrophicMode and guild
# and OTU as row names

FG_tax_table <- FG_tax_table[c("OTU", "trophicMode", "guild")]

# and OTU as row names
FG_tax_table <- column_to_rownames(FG_tax_table, var = "OTU")

# change column names
colnames(FG_tax_table)[1] <- "trophicMode_gen"
colnames(FG_tax_table)[2] <- "guild_gen"

# save with new name
FUNGuild_gen <- FG_tax_table

Family

fg <- FG[FG$taxonomicLevel == "Family", ]      
tax_table <- as.data.frame(tax_table(ps))
tax_table$OTU <- rownames(tax_table)

# merge tables
colnames(fg)[1] <- "family"
FG_tax_table <- merge(tax_table, fg, by = "family", all.x = TRUE)

# modify "possible"s to NA's at guild and trophic mode -level
FG_tax_table$guild[FG_tax_table$confidenceRanking == "Possible"] <- NA
FG_tax_table$trophicMode[FG_tax_table$confidenceRanking == "Possible"] <- NA

# only keep the OTU, TrophicMode and guild
FG_tax_table <- FG_tax_table[c("OTU", "trophicMode", "guild")]

# and taxon as row names
FG_tax_table <- column_to_rownames(FG_tax_table, var = "OTU")

# change column names
colnames(FG_tax_table)[1] <- "trophicMode_fam"
colnames(FG_tax_table)[2] <- "guild_fam"

# save with new name
FUNGuild_fam <- FG_tax_table

Order

fg <- FG[FG$taxonomicLevel == "Order", ]      
tax_table <- as.data.frame(tax_table(ps))
tax_table$OTU <- rownames(tax_table)

# merge tables
colnames(fg)[1] <- "order"
FG_tax_table <- merge(tax_table, fg, by = "order", all.x = TRUE)

# modify "possible"s to NA's at guild and trophic mode -level
FG_tax_table$guild[FG_tax_table$confidenceRanking == "Possible"] <- NA
FG_tax_table$trophicMode[FG_tax_table$confidenceRanking == "Possible"] <- NA

# only keep the OTU, TrophicMode and guild
FG_tax_table <- FG_tax_table[c("OTU", "trophicMode", "guild")]

# and taxon as row names
FG_tax_table <- column_to_rownames(FG_tax_table, var = "OTU")

# change column names
colnames(FG_tax_table)[1] <- "trophicMode_ord"
colnames(FG_tax_table)[2] <- "guild_ord"

# save with new name
FUNGuild_ord <- FG_tax_table

Phylum

fg <- FG[FG$taxonomicLevel == "Phylum", ]      
tax_table <- as.data.frame(tax_table(ps))
tax_table$OTU <- rownames(tax_table)

# merge tables
colnames(fg)[1] <- "phylum"
FG_tax_table <- merge(tax_table, fg, by = "phylum", all.x = TRUE)

# modify "possible"s to NA's at guild and trophic mode -level
FG_tax_table$guild[FG_tax_table$confidenceRanking == "Possible"] <- NA
FG_tax_table$trophicMode[FG_tax_table$confidenceRanking == "Possible"] <- NA

# only keep the OTU, TrophicMode and guild
FG_tax_table <- FG_tax_table[c("OTU", "trophicMode", "guild")]

# and OTU as row names
FG_tax_table <- column_to_rownames(FG_tax_table, var = "OTU")

# change column names
colnames(FG_tax_table)[1] <- "trophicMode_phy"
colnames(FG_tax_table)[2] <- "guild_phy"

# save with new name
FUNGuild_phy <- FG_tax_table

Combine all annotations:Genus Family Order Phylum

x <- left_join(rownames_to_column(FUNGuild_gen), rownames_to_column(FUNGuild_fam), by = "rowname")
x <- left_join(x, rownames_to_column(FUNGuild_ord), by = "rowname")
x <- left_join(x, rownames_to_column(FUNGuild_phy), by = "rowname")
# get the value from another column if NA
y <- x %>%
  mutate(trophicMode_gen = coalesce(trophicMode_gen,trophicMode_fam))

# get the value from another column if NA
y <- y %>%
  mutate(guild_gen = coalesce(guild_gen,guild_fam))

# same for order
# get the value from another column if NA
y <- y %>%
  mutate(trophicMode_gen = coalesce(trophicMode_gen,trophicMode_ord))

# get the value from another column if NA
y <- y %>%
  mutate(guild_gen = coalesce(guild_gen,guild_ord))

# same for phylum
# get the value from another column if NA
y <- y %>%
  mutate(trophicMode_gen = coalesce(trophicMode_gen,trophicMode_phy))

# get the value from another column if NA
y <- y %>%
  mutate(guild_gen = coalesce(guild_gen,guild_phy))

# then rename the gen columns
colnames(y)[2] <- "trophicMode"
colnames(y)[3] <- "guild"


# remove the rest of the columns
y <- y[, -c(4:11)]

# rownames
y2 <- y[,-1]
rownames(y2) <- y[,1]
# Remove empty spaces and remove also "|"

y2$trophicMode <- gsub(" ", "", y2$trophicMode, fixed = TRUE)
y2$guild <- gsub("|", "", y2$guild, fixed = TRUE)
y3 <- left_join(rownames_to_column(y2), rownames_to_column(tax_table), by = "rowname")
row.names(y3) <- y3$rowname
y3 <- y3[, -1]

Define AMFs, Ectomycorrhizal and Plant pathogens

Here in FUNGuild column:

  • Ectomycorrhizal = guilds containing “Ectomycorrhizal” from trophic mode Symbiotroph only, NOTE! this is same as pure Ectomycorrhizal!!
  • Arbuscular Mycorrhizal = all guilds containing “Arbuscular Mycorrhizal” from trophic mode Symbiotroph (no AMF in other trophic modes)
  • Endophyte = Pure endophytes from trophic mode Symbiotroph only
  • Plant Pathogen = Pure Plant Pathogens from trophic mode Pathotroph only
z <- y3

z <- z %>%  
   mutate(FG = case_when(grepl("Ectomycorrhizal", guild) & trophicMode=="Symbiotroph" ~ "Ectomycorrhizal", grepl("Arbuscular", guild) ~ "Arbuscular Mycorrhizal", guild == "Endophyte" & trophicMode=="Symbiotroph" ~ "Endophyte", guild=="Plant Pathogen" & trophicMode=="Pathotroph"~ "Plant Pathogen"))
z <- z %>%
  mutate(FUNGuild = case_when(FG == "Ectomycorrhizal" |  FG == "Arbuscular Mycorrhizal" | FG == "Endophyte" | FG == "Plant Pathogen" ~ FG, guild != "Ectomycorrhizal" |  FG != "Arbuscular Mycorrhizal" |  FG != "Endophyte" | FG != "Plant Pathogen" ~ z$trophicMode))
FG_tax_table <- z

# remove the FG column

FG_tax_table <- FG_tax_table[, -c(11)]

Check the different written forms, if I have empty spaces?

unique(FG_tax_table$trophicMode)
##  [1] "Saprotroph"                        NA                                 
##  [3] "Pathotroph-Saprotroph"             "Saprotroph-Symbiotroph"           
##  [5] "Pathotroph"                        "Symbiotroph"                      
##  [7] "Pathotroph-Saprotroph-Symbiotroph" "Pathotroph-Symbiotroph"           
##  [9] "Saprotroph-Saprotroph-Symbiotroph" "Pathotroph-Pathotroph-Saprotroph"
#unique(FG_tax_table$guild)
unique(FG_tax_table$FUNGuild)
##  [1] "Saprotroph"                        NA                                 
##  [3] "Pathotroph-Saprotroph"             "Saprotroph-Symbiotroph"           
##  [5] "Plant Pathogen"                    "Ectomycorrhizal"                  
##  [7] "Arbuscular Mycorrhizal"            "Pathotroph"                       
##  [9] "Pathotroph-Saprotroph-Symbiotroph" "Pathotroph-Symbiotroph"           
## [11] "Symbiotroph"                       "Saprotroph-Saprotroph-Symbiotroph"
## [13] "Endophyte"                         "Pathotroph-Pathotroph-Saprotroph"
# change some names for FUNGuild
FG_tax_table$FUNGuild[FG_tax_table$FUNGuild=="Symbiotroph"] <- "Other Symbiotroph"
FG_tax_table$FUNGuild[FG_tax_table$FUNGuild=="Pathotroph"] <- "Other Pathotroph"
FG_tax_table$FUNGuild[FG_tax_table$FUNGuild=="Pathotroph-Pathotroph-Saprotroph"] <- "Pathotroph-Saprotroph"
FG_tax_table$FUNGuild[FG_tax_table$FUNGuild=="Saprotroph-Saprotroph-Symbiotroph"] <- "Saprotroph-Symbiotroph"

# and for trophicMode
FG_tax_table$trophicMode[FG_tax_table$trophicMode=="Pathotroph-Pathotroph-Saprotroph"] <- "Pathotroph-Saprotroph"
FG_tax_table$trophicMode[FG_tax_table$trophicMode=="Saprotroph-Saprotroph-Symbiotroph"] <- "Saprotroph-Symbiotroph"

# reorder
FG_tax_table <- FG_tax_table[, c(1:2, 11, 3:10)]

Check again

unique(FG_tax_table$trophicMode)
## [1] "Saprotroph"                        NA                                 
## [3] "Pathotroph-Saprotroph"             "Saprotroph-Symbiotroph"           
## [5] "Pathotroph"                        "Symbiotroph"                      
## [7] "Pathotroph-Saprotroph-Symbiotroph" "Pathotroph-Symbiotroph"
#unique(FG_tax_table$guild)
unique(FG_tax_table$FUNGuild)
##  [1] "Saprotroph"                        NA                                 
##  [3] "Pathotroph-Saprotroph"             "Saprotroph-Symbiotroph"           
##  [5] "Plant Pathogen"                    "Ectomycorrhizal"                  
##  [7] "Arbuscular Mycorrhizal"            "Other Pathotroph"                 
##  [9] "Pathotroph-Saprotroph-Symbiotroph" "Pathotroph-Symbiotroph"           
## [11] "Other Symbiotroph"                 "Endophyte"

13.1.2. Save ps_FG for HEATMAP

ps_FG_HEATMAP_no_clusters <- phyloseq(otu_table(ps), tax_table(as.matrix(FG_tax_table)), sample_data(ps))
ps_FG_HEATMAP_no_clusters
## phyloseq-class experiment-level object
## otu_table()   OTU Table:         [ 20610 taxa and 140 samples ]
## sample_data() Sample Data:       [ 140 samples by 28 sample variables ]
## tax_table()   Taxonomy Table:    [ 20610 taxa by 11 taxonomic ranks ]

13.1.3.

FG_tax <- as.data.frame(as.matrix(tax_table(ps_FG_HEATMAP_no_clusters)))

13.1.5.Transform RA and z scaled

ps_genus <- aggregate_rare(ps, level = 'genus', detection = 0/100, prevalence = 0/100)
ps_genus
## phyloseq-class experiment-level object
## otu_table()   OTU Table:         [ 943 taxa and 140 samples ]
## sample_data() Sample Data:       [ 140 samples by 28 sample variables ]
## tax_table()   Taxonomy Table:    [ 943 taxa by 1 taxonomic ranks ]
# subset
ps_genus_NO_FOREST <- subset_samples(ps_genus, sample_type=="forest")
ps_genus_NO_FOREST
## phyloseq-class experiment-level object
## otu_table()   OTU Table:         [ 943 taxa and 15 samples ]
## sample_data() Sample Data:       [ 15 samples by 28 sample variables ]
## tax_table()   Taxonomy Table:    [ 943 taxa by 1 taxonomic ranks ]
meta_nf <- meta(ps_genus_NO_FOREST)

meta_nf$Depth <- gsub("...", "-", meta_nf$depth, fixed = TRUE)

meta_nf$Depth[meta_nf$Depth=="40-"] <- "40-80"


meta_nf$Depth[meta_nf$Depth=="0-10"] <- "0-10 cm"
meta_nf$Depth[meta_nf$Depth=="10-20"] <- "10-20 cm"
meta_nf$Depth[meta_nf$Depth=="20-30"] <- "20-30 cm"
meta_nf$Depth[meta_nf$Depth=="30-40"] <- "30-40 cm"
meta_nf$Depth[meta_nf$Depth=="40-80"] <- "40-80 cm"

# RA-transform
ps_genus_NO_FOREST_RA <- microbiome::transform(ps_genus_NO_FOREST, 'compositional')

# Z-transform
ps_genus_NO_FOREST_RA_z <- microbiome::transform(ps_genus_NO_FOREST_RA, 'Z', log10 = FALSE)

# subset to only the 30 genera from above

hetamap.taxa <- taxa_names(ps_Heatmap)
data <- as(otu_table(ps_genus_NO_FOREST_RA_z), "matrix")
data <- as.data.frame(data)
data_subset <- data[(row.names(data) %in% hetamap.taxa),]
# now only 30 genuses, as should be
data_subset <- as.matrix(data_subset)

# add annotations "Depth" and "Treatment"
my_sample_col <- data.frame(meta_nf[c("Depth", "sample_type")], row.names = row.names(meta_nf))
colnames(my_sample_col) <- c("Depth", "Treatment")


x <- as.data.frame(t(data_subset))
y <- as.data.frame(my_sample_col)
colnames(y) <- c("Depth", "sample_type")
z <- dplyr::left_join(rownames_to_column(x), rownames_to_column(y), by=c("rowname" = "rowname"))
colnames(z)[1] <- "ID"

f <- z %>%
  group_by(sample_type, Depth) %>%
  summarise_all(mean)

# remove ID column
f <- f[ , -3]

# make new ID column, which is the soiltypedepth
library(stringr)
f$ID <- str_c(f$sample_type, '_', f$Depth)
# make new df with just sample type, Depth and sampletypedepth

df <- f[c("sample_type", "Depth", "ID")]
# remove extra columns from f
f2 <- f[, -c(1, 2, 34)]
# column into rownames
rownames(f2) <- f$ID
# make into numeric matrix
f3 <- data.matrix(f2, rownames.force = NA)
f4 <- t(f3)

# same for df
# remove extra columns from df
df2 <- df[ , -c(3)]
# column into rownames
rownames(df2) <- df$ID
my_sample_col2 <- df2
my_sample_col3 <- as.data.frame(my_sample_col2)
rownames(my_sample_col3) <- df$ID
# lets add annotations of samples

# add annotations "Depth" and "soil_type" and change order
colnames(my_sample_col3) <- c("Treatment", "Depth")
my_sample_col3 <- my_sample_col3[, c(2,1)]

13.1.6 Finally plot HEATMAP

library("pheatmap")

# only keep the genus and FUNGuild
x <- FG_tax[, c(9, 3)]
# Remove duplicates by single column
FUNGuild_tax_table <- x[!duplicated(x$genus), ]
dim(FUNGuild_tax_table)
## [1] 943   2
# and genus as row names
rownames(FUNGuild_tax_table) <- NULL
FUNGuild_tax_table <- column_to_rownames(FUNGuild_tax_table, var = "genus")

# view data frame
unique(FUNGuild_tax_table$FUNGuild)
##  [1] NA                                  "Saprotroph-Symbiotroph"           
##  [3] "Arbuscular Mycorrhizal"            "Pathotroph-Saprotroph-Symbiotroph"
##  [5] "Pathotroph-Saprotroph"             "Saprotroph"                       
##  [7] "Endophyte"                         "Ectomycorrhizal"                  
##  [9] "Other Pathotroph"                  "Plant Pathogen"                   
## [11] "Other Symbiotroph"                 "Pathotroph-Symbiotroph"
FUNGuild_tax_table$FUNGuild <- as.factor(FUNGuild_tax_table$FUNGuild)

# change level order
FUNGuild_tax_table$FUNGuild <- factor(FUNGuild_tax_table$FUNGuild, levels = c("Plant Pathogen", "Other Pathotroph", "Pathotroph-Saprotroph", "Pathotroph-Saprotroph-Symbiotroph", "Pathotroph-Symbiotroph", "Saprotroph", "Saprotroph-Symbiotroph", "Other Symbiotroph", "Ectomycorrhizal", "Endophyte", "Arbuscular Mycorrhizal"))
levels(FUNGuild_tax_table$FUNGuild)
##  [1] "Plant Pathogen"                    "Other Pathotroph"                 
##  [3] "Pathotroph-Saprotroph"             "Pathotroph-Saprotroph-Symbiotroph"
##  [5] "Pathotroph-Symbiotroph"            "Saprotroph"                       
##  [7] "Saprotroph-Symbiotroph"            "Other Symbiotroph"                
##  [9] "Ectomycorrhizal"                   "Endophyte"                        
## [11] "Arbuscular Mycorrhizal"
#Create color palette

my_colour = list(
  "Treatment" = c(forest = "#1167b1"),
  Depth = c('0-10 cm' = "#387212", '10-20 cm' = "#ADC476", '20-30 cm' = "#D8D2BA",'30-40 cm' = "#907852", '40-80 cm' = "#6A4C3A"),
  FUNGuild = c("Plant Pathogen" = "deeppink", "Other Pathotroph" = "#d6849a", "Pathotroph-Saprotroph" = "#e3adbc", "Pathotroph-Saprotroph-Symbiotroph" = "#f1d6dd", "Pathotroph-Symbiotroph" = "#faf1f4", "Saprotroph" = "#CBBEAD", "Saprotroph-Symbiotroph" = "darkseagreen", "Other Symbiotroph" = "lightgreen", "Ectomycorrhizal" = "darkgreen", "Endophyte" = "#A2CF31", "Arbuscular Mycorrhizal" = "darkolivegreen1"))

p3 <- as.ggplot(function() pheatmap(f4, cluster_cols = FALSE, cluster_rows = TRUE, annotation_col = my_sample_col3, annotation_colors = my_colour, color=colorRampPalette(c("navy", "white", "red"))(50), show_colnames = FALSE, legend = TRUE, annotation_row = FUNGuild_tax_table, border_color = NA, cellheight = 16, fontsize = 14, fontsize_row = 14, annotation_names_row = FALSE, annotation_names_col = FALSE))

p3

C) COMPOSTION PLOTS

library('phyloseq')
library("cowplot")
library("dplyr")
library("ggplot2")
library("vegan")
library("microbiome")
library("tibble")
library(stringr)
library(reshape2)
library(tidyr)

library(metagMisc)
library(pheatmap)
library(metagMisc)
library(RColorBrewer)
library(viridis)
library(tidyverse)
library(ggpubr)

setwd('C:\\Users\\lehakkin\\PhD\\Fungal_ITS_soil_vertical_profile')

load('ps_FINAL')
ps
## phyloseq-class experiment-level object
## otu_table()   OTU Table:         [ 20610 taxa and 140 samples ]
## sample_data() Sample Data:       [ 140 samples by 28 sample variables ]
## tax_table()   Taxonomy Table:    [ 20610 taxa by 7 taxonomic ranks ]
# Pick relative abundances (compositional) and sample metadata 
ps_RA <- microbiome::transform(ps, "compositional")

load(file = 'ps_FG_with_NAs')#ps_FG
ps_FG
## phyloseq-class experiment-level object
## otu_table()   OTU Table:         [ 20610 taxa and 140 samples ]
## sample_data() Sample Data:       [ 140 samples by 27 sample variables ]
## tax_table()   Taxonomy Table:    [ 20610 taxa by 11 taxonomic ranks ]

1. FUNGuild composition

Note, due to reviewers comment, I will redo the Functional guild composition plot so that Unassigned are kept!!!

FG_tax <- ps_FG %>% tax_table() %>% as.data.frame()

unique(FG_tax$trophicMode)
## [1] NA                                  "Saprotroph-Symbiotroph"           
## [3] "Symbiotroph"                       "Pathotroph-Saprotroph-Symbiotroph"
## [5] "Pathotroph-Saprotroph"             "Pathotroph"                       
## [7] "Saprotroph"                        "Pathotroph-Symbiotroph"

1.1. separate pure Plant pathogens

Note! In the composition figure:

  • Ectomycorrhizal = guilds containing “Ectomycorrhizal” from trophic mode Symbiotroph only, NOTE! thiis is same as pure Ectomycorrhizal!!
  • Arbuscular Mycorrhizal = all guilds containing “Arbuscular Mycorrhizal” from trophic mode Symbiotroph (nor AMF in other TMs)
  • Endophyte = Pure endophytes from trophic mode Symbiotroph only
  • Plant Pathogen = Pure Plant Pathogens from trophic mode Pathotroph only
z <- FG_tax

z <- z %>%  
   mutate(FG = case_when(grepl("Ectomycorrhizal", guild) & trophicMode=="Symbiotroph" ~ "Ectomycorrhizal", grepl("Arbuscular", guild) ~ "Arbuscular Mycorrhizal", guild == "Endophyte" & trophicMode=="Symbiotroph" ~ "Endophyte", guild=="Plant Pathogen" & trophicMode=="Pathotroph"~ "Plant Pathogen"))
z <- z %>%
  mutate(FUNGuild = case_when(FG == "Ectomycorrhizal" |  FG == "Arbuscular Mycorrhizal" | FG == "Endophyte" | FG == "Plant Pathogen" ~ FG, guild != "Ectomycorrhizal" |  FG != "Arbuscular Mycorrhizal" |  FG != "Endophyte" | FG != "Plant Pathogen" ~ z$trophicMode))
# change some names for FUNGuild
z$FUNGuild[z$FUNGuild=="Symbiotroph"] <- "Other Symbiotroph"
z$FUNGuild[z$FUNGuild=="Pathotroph"] <- "Other Pathotroph"
# remove species and FG
tax <- z[, -c(10, 12)]

unique(tax$FUNGuild)
##  [1] NA                                  "Saprotroph-Symbiotroph"           
##  [3] "Arbuscular Mycorrhizal"            "Pathotroph-Saprotroph-Symbiotroph"
##  [5] "Pathotroph-Saprotroph"             "Other Pathotroph"                 
##  [7] "Saprotroph"                        "Endophyte"                        
##  [9] "Ectomycorrhizal"                   "Pathotroph-Symbiotroph"           
## [11] "Plant Pathogen"                    "Other Symbiotroph"
# rename FUNGuild to species
colnames(tax)[3] <- "species"
tax <- tax %>% as.matrix()

# reassign to phyloseq
x <- ps_FG
tax_table(x) <- tax_table(tax)

x <- aggregate_rare(x, level = "species", detection = 0, prevalence = 0)
x               
## phyloseq-class experiment-level object
## otu_table()   OTU Table:         [ 12 taxa and 140 samples ]
## sample_data() Sample Data:       [ 140 samples by 27 sample variables ]
## tax_table()   Taxonomy Table:    [ 12 taxa by 2 taxonomic ranks ]
# 12 taxa and 140 samples

# lets not remove NAs!!

# remove "Unknown"
#allTaxa = taxa_names(x)
#badTaxa = c("Unknown")
#myTaxa <- allTaxa[!(allTaxa %in% badTaxa)]
#x <- prune_taxa(myTaxa, x)
#x
# would be 11 taxa and 140 samples

x_RA <- transform(x, 'compositional')

1.2. Plot

#create data table
df <-  psmelt(x_RA)

sampletype_names <- list(
  'forest' = "forest", 'meadow' = "meadow", 'organic' = "organic", 'conventional' = "conventional"
)

sampletype_labeller <- function(variable,value){
  return(sampletype_names[value])
}

df$species <- factor(df$species)

levels(df$species)
##  [1] "Arbuscular Mycorrhizal"            "Ectomycorrhizal"                  
##  [3] "Endophyte"                         "Other Pathotroph"                 
##  [5] "Other Symbiotroph"                 "Pathotroph-Saprotroph"            
##  [7] "Pathotroph-Saprotroph-Symbiotroph" "Pathotroph-Symbiotroph"           
##  [9] "Plant Pathogen"                    "Saprotroph"                       
## [11] "Saprotroph-Symbiotroph"            "Unknown"
# change level order

df$species <- factor(df$species, levels = c("Unknown","Plant Pathogen", "Other Pathotroph", "Pathotroph-Saprotroph", "Pathotroph-Saprotroph-Symbiotroph", "Pathotroph-Symbiotroph", "Saprotroph", "Saprotroph-Symbiotroph", "Other Symbiotroph", "Ectomycorrhizal", "Endophyte", "Arbuscular Mycorrhizal"))
levels(df$species)
##  [1] "Unknown"                           "Plant Pathogen"                   
##  [3] "Other Pathotroph"                  "Pathotroph-Saprotroph"            
##  [5] "Pathotroph-Saprotroph-Symbiotroph" "Pathotroph-Symbiotroph"           
##  [7] "Saprotroph"                        "Saprotroph-Symbiotroph"           
##  [9] "Other Symbiotroph"                 "Ectomycorrhizal"                  
## [11] "Endophyte"                         "Arbuscular Mycorrhizal"
#Create color palette

my_colour = list(
  "sample" = c(meadow = "#fbc02d", organic = "#8a8a8a", conventional =  "#b71c1c"),
  depth = c('0...10' = "#387212", '10...20' = "#ADC476", '20...30' = "#D8D2BA",'30...40' = "#907852", '40...' = "#6A4C3A"),
  FUNGuild = c("Unknown" = "grey", "Plant Pathogen" = "deeppink", "Other Pathotroph" = "#d6849a", "Pathotroph-Saprotroph" = "#e3adbc", "Pathotroph-Saprotroph-Symbiotroph" = "#f1d6dd", "Pathotroph-Symbiotroph" = "#faf1f4", "Saprotroph" = "#CBBEAD", "Saprotroph-Symbiotroph" = "darkseagreen", "Other Symbiotroph" = "lightgreen", "Ectomycorrhizal" = "darkgreen", "Endophyte" = "#A2CF31", "Arbuscular Mycorrhizal" = "darkolivegreen1"))


# Make new depth variable

df$Depth <- df$depth

df$Depth <- gsub("...", "-", df$Depth, fixed = TRUE)

df$Depth[df$Depth=="40-"] <- "40-80"


df$Depth[df$Depth=="0-10"] <- "0-10 cm"
df$Depth[df$Depth=="10-20"] <- "10-20 cm"
df$Depth[df$Depth=="20-30"] <- "20-30 cm"
df$Depth[df$Depth=="30-40"] <- "30-40 cm"
df$Depth[df$Depth=="40-80"] <- "40-80 cm"

FG  <- ggplot(df, aes(x = Depth ,y = Abundance, fill = species)) + geom_bar(stat="identity", position="fill") + scale_fill_manual(values = my_colour[["FUNGuild"]]) + facet_grid (cols = vars(sample_type), labeller=sampletype_labeller) + geom_bar(stat="identity", position="fill") + theme(panel.border = element_rect(colour = "black", fill=NA, size=0.5)) + theme_cowplot() +
  theme(axis.text = element_text(size=14),
        axis.title = element_text(size=18),
        legend.text = element_text(size=16),
        legend.title = element_text(size=18),
        legend.spacing.y = unit(0, 'cm'),
        legend.key.size = unit(0.8, 'cm'),
        title = element_text(size=18))+ theme(axis.text.x = element_text(angle = 45, hjust = 1)) + guides(fill = guide_legend(nrow = 4, title="")) + theme(strip.text.x = element_text(size = 22)) +
  ylab(label = "Relative abundance") + theme(legend.position="top") +  xlab("Depth")

FG

2. Class composition

ps_RA <- microbiome::transform(ps, 'compositional')

ps_RA.class <- aggregate_rare(ps_RA, level = 'class', detection = 3/100, prevalence = 3/140, include.lowest = TRUE)
ps_RA.class
## phyloseq-class experiment-level object
## otu_table()   OTU Table:         [ 16 taxa and 140 samples ]
## sample_data() Sample Data:       [ 140 samples by 28 sample variables ]
## tax_table()   Taxonomy Table:    [ 16 taxa by 2 taxonomic ranks ]
#create data table
ps_RA.class_df <-  psmelt(ps_RA.class)

#Create color palette
#set color palette from RColorBrewer
# Define the number of colors you want
library("RColorBrewer") # nice color options
nb.cols = length(unique((as.data.frame(ps_RA.class@tax_table))$class))
cbbPalette <- colorRampPalette(brewer.pal(12, "Set3"))(nb.cols)

sampletype_names <- list(
  'forest' = "forest", 'meadow' = "meadow", 'organic' = "organic", 'conventional' = "conventional"
)

sampletype_labeller <- function(variable,value){
  return(sampletype_names[value])
}

# check unique values for class
unique(ps_RA.class_df$class)
##  [1] "Leotiomycetes"              "Agaricomycetes"            
##  [3] "Dothideomycetes"            "Sordariomycetes"           
##  [5] "Archaeosporomycetes"        "Mortierellomycetes"        
##  [7] "Geoglossomycetes"           "Tremellomycetes"           
##  [9] "Ascomycota_unclassified"    "Glomeromycetes"            
## [11] "Microbotryomycetes"         "Pezizomycetes"             
## [13] "Other"                      "Eurotiomycetes"            
## [15] "Basidiomycota_unclassified" "Orbiliomycetes"
is.factor(ps_RA.class_df$class)
## [1] FALSE
ps_RA.class_df$class <- as.factor(ps_RA.class_df$class)
levels(ps_RA.class_df$class)
##  [1] "Agaricomycetes"             "Archaeosporomycetes"       
##  [3] "Ascomycota_unclassified"    "Basidiomycota_unclassified"
##  [5] "Dothideomycetes"            "Eurotiomycetes"            
##  [7] "Geoglossomycetes"           "Glomeromycetes"            
##  [9] "Leotiomycetes"              "Microbotryomycetes"        
## [11] "Mortierellomycetes"         "Orbiliomycetes"            
## [13] "Other"                      "Pezizomycetes"             
## [15] "Sordariomycetes"            "Tremellomycetes"
ps_RA.class_df$class <- relevel(ps_RA.class_df$class, "Other")

cbbPalette <- c("#b2b2b2", "#8DD3C7", "#FFED6F", "#CAAEC5", "#F68378", "#8D6942", "#F3B962", "#BCD868", "#6E99BE", "#F0D1E1", "#C191C2", "#FFFFC6", "darkgreen", "#D0D9CD", "#8BC081", "#FF8DB5")



# plot with detection = 1/100, prevalence = 2/100
#Create a plot
classF  <- ggplot(ps_RA.class_df, aes(x = depth ,y = Abundance, fill = class)) + geom_bar(stat="identity", position="fill") + scale_fill_manual(values = cbbPalette) + facet_grid (cols = vars(sample_type), labeller=sampletype_labeller) + geom_bar(stat="identity", position="fill") + scale_fill_manual(values = cbbPalette) + theme(panel.border = element_rect(colour = "black", fill=NA, size=0.5)) + theme_cowplot() +
  theme(axis.text = element_text(size=14),
        axis.title = element_text(size=18),
        legend.text = element_text(size=16),
        legend.title = element_text(size=18),
        legend.spacing.y = unit(0, 'cm'),
        legend.key.size = unit(0.8, 'cm'),
        title = element_text(size=18))+ theme(axis.text.x = element_text(angle = 45, hjust = 1)) + guides(fill = guide_legend(nrow = 6, title="")) + theme(strip.text.x = element_text(size = 22)) +
  ylab(label = "Relative abundance") + theme(legend.position="top") +
  theme(axis.text.x=element_blank(),
        axis.ticks.x=element_blank(),
        axis.title.x = element_blank())

classF

3. Phylum composition

ps_RA_phyla_aggr <- aggregate_rare(ps_RA, level = 'phylum', detection = 2/100, prevalence = 2/140)
ps_RA_phyla_aggr
## phyloseq-class experiment-level object
## otu_table()   OTU Table:         [ 6 taxa and 140 samples ]
## sample_data() Sample Data:       [ 140 samples by 28 sample variables ]
## tax_table()   Taxonomy Table:    [ 6 taxa by 2 taxonomic ranks ]
#create data table
ps_RA_phyla_df <-  psmelt(ps_RA_phyla_aggr)

#Create color palette
cbbPalette <- c("#666666","#1B9E77", "#D95F02", "#E7298A", "#7570B3", "#66A61E")

sampletype_names <- list(
  'forest' = "forest", 'meadow' = "meadow", 'organic' = "organic", 'conventional' = "conventional"
)

sampletype_labeller <- function(variable,value){
  return(sampletype_names[value])
}


# check unique values for phylum
unique(ps_RA_phyla_df$phylum)
## [1] "Ascomycota"        "Basidiomycota"     "Glomeromycota"    
## [4] "Mortierellomycota" "Rozellomycota"     "Other"
# [1] "Ascomycota"        "Basidiomycota"     "Glomeromycota"     "Mortierellomycota"
# [5] "Rozellomycota"     "Other"  

ps_RA_phyla_df$phylum <- as.factor(ps_RA_phyla_df$phylum)
levels(ps_RA_phyla_df$phylum)
## [1] "Ascomycota"        "Basidiomycota"     "Glomeromycota"    
## [4] "Mortierellomycota" "Other"             "Rozellomycota"
ps_RA_phyla_df$phylum <- factor(ps_RA_phyla_df$phylum, levels = c("Other","Ascomycota", "Basidiomycota", "Glomeromycota", "Mortierellomycota", "Rozellomycota"))
levels(ps_RA_phyla_df$phylum)
## [1] "Other"             "Ascomycota"        "Basidiomycota"    
## [4] "Glomeromycota"     "Mortierellomycota" "Rozellomycota"
#Create a plot
phylumF  <- ggplot(ps_RA_phyla_df, aes(x = depth ,y = Abundance, fill = phylum)) + geom_bar(stat="identity", position="fill") + scale_fill_manual(values = cbbPalette) + facet_grid (cols = vars(sample_type), labeller=sampletype_labeller) + geom_bar(stat="identity", position="fill") + scale_fill_manual(values = cbbPalette) + theme(panel.border = element_rect(colour = "black", fill=NA, size=0.5)) + theme_cowplot() +
  theme(axis.text = element_text(size=14),
        axis.title = element_text(size=18),
        legend.text = element_text(size=16),
        legend.title = element_text(size=18),
        legend.spacing.y = unit(0, 'cm'),
        legend.key.size = unit(0.8, 'cm'),
        title = element_text(size=18))+ theme(axis.text.x = element_text(angle = 45, hjust = 1)) + guides(fill = guide_legend(title="")) + theme(strip.text.x = element_text(size = 22)) +
  ylab(label = "Relative abundance") + theme(legend.position="top") +
  theme(axis.text.x=element_blank(),
        axis.ticks.x=element_blank(),
        axis.title.x = element_blank())

phylumF

D) Combine composition plots and heatmap figures

1.1. Composition bar plots

library(ggplotify)


left <- ggarrange(phylumF, classF , FG,
                  labels = c("A", "B", "C"),
                  ncol = 1, nrow = 3, heights = c(1.1, 1.5, 1.5))

left

2. Combine Heatmaps

right <- ggarrange(p2, p3,
                       labels = c("D", "E"),
                       ncol = 1, nrow = 2, heights = c(2.7, 1.4))

right

3. Combine composition and heatmap

figure <- ggarrange(left, right,
                    ncol = 2, nrow = 1, heights = c(1, 1), widths = c(1, 1))

figure

Saved with width 2200 and height 2600


RESULTS STEPS 7:FUNGuild testing

Note! While doing tests, always check the homogeneity of variance result and decide based on that which later test result to use (Anova and Tukey or Kruskal and Wilcoxon)

library('phyloseq')
library("cowplot")
library("dplyr")
library("ggplot2")
library("vegan")
library("microbiome")
library("tibble")
library(stringr)
library(reshape2)
library(tidyr)

setwd('C:\\Users\\lehakkin\\PhD\\Fungal_ITS_soil_vertical_profile')

load(file = 'ps_FG_with_NAs')#ps_FG
ps_FG
## phyloseq-class experiment-level object
## otu_table()   OTU Table:         [ 20610 taxa and 140 samples ]
## sample_data() Sample Data:       [ 140 samples by 27 sample variables ]
## tax_table()   Taxonomy Table:    [ 20610 taxa by 11 taxonomic ranks ]

1. Test Plant Pathogen

Note:

# remove species

z <- as.data.frame(tax_table(ps_FG))

tax <- z[, 1:9]

# rename FUNGuild to species
colnames(tax)[3] <- "species"
tax <- tax %>% as.matrix()

# reassign to phyloseq
x <- ps_FG
tax_table(x) <- tax_table(tax)

x <- aggregate_rare(x, level = "species", detection = 0, prevalence = 0)
x               
## phyloseq-class experiment-level object
## otu_table()   OTU Table:         [ 12 taxa and 140 samples ]
## sample_data() Sample Data:       [ 140 samples by 27 sample variables ]
## tax_table()   Taxonomy Table:    [ 12 taxa by 2 taxonomic ranks ]
# 12 taxa and 140 samples


# remove "Unknown"
#allTaxa = taxa_names(x)
#badTaxa = c("Unknown")
#myTaxa <- allTaxa[!(allTaxa %in% badTaxa)]
#x <- prune_taxa(myTaxa, x)
#x
# 11 taxa and 140 samples

x_RA <- transform(x, 'compositional')
#create data table
FG_df <-  psmelt(x_RA)
unique(FG_df$OTU)
##  [1] "Saprotroph"                        "Saprotroph-Symbiotroph"           
##  [3] "Unknown"                           "Ectomycorrhizal"                  
##  [5] "Arbuscular Mycorrhizal"            "Pathotroph-Saprotroph"            
##  [7] "Pathotroph-Symbiotroph"            "Endophyte"                        
##  [9] "Pathotroph-Saprotroph-Symbiotroph" "Other Pathotroph"                 
## [11] "Plant Pathogen"                    "Other Symbiotroph"
taxa <- "Plant Pathogen"

# Construct a data.frame with the selected
# taxonomic group and grouping
# relative
df <- filter(FG_df, OTU == taxa)

library(car)
# Using leveneTest()
result = leveneTest(Abundance ~ sample_type, df)
# print the result
print(result)
## Levene's Test for Homogeneity of Variance (center = median)
##        Df F value    Pr(>F)    
## group   3   6.659 0.0003143 ***
##       136                      
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

not homogenous

# perform the Kruskal test
kruskal.test(Abundance ~ sample_type, data = df)
## 
##  Kruskal-Wallis rank sum test
## 
## data:  Abundance by sample_type
## Kruskal-Wallis chi-squared = 27.409, df = 3, p-value = 4.833e-06
pairwise.wilcox.test(df$Abundance, df$sample_type,
                     p.adjust.method = "BH")
## 
##  Pairwise comparisons using Wilcoxon rank sum test with continuity correction 
## 
## data:  df$Abundance and df$sample_type 
## 
##              forest  meadow  organic
## meadow       0.12765 -       -      
## organic      0.00090 0.00090 -      
## conventional 0.00074 0.00074 0.65964
## 
## P value adjustment method: BH
x <- df %>%
  group_by(sample_type) %>%
  summarise(mean = mean(Abundance, na.rm = TRUE), se = (sd(Abundance, na.rm = TRUE)/sqrt(length((Abundance)))))
x
## # A tibble: 4 × 3
##   sample_type     mean      se
##   <fct>          <dbl>   <dbl>
## 1 forest       0.00392 0.00110
## 2 meadow       0.0120  0.00406
## 3 organic      0.0326  0.00450
## 4 conventional 0.0291  0.00399

2. Test Endophyte

unique(FG_df$OTU)
##  [1] "Saprotroph"                        "Saprotroph-Symbiotroph"           
##  [3] "Unknown"                           "Ectomycorrhizal"                  
##  [5] "Arbuscular Mycorrhizal"            "Pathotroph-Saprotroph"            
##  [7] "Pathotroph-Symbiotroph"            "Endophyte"                        
##  [9] "Pathotroph-Saprotroph-Symbiotroph" "Other Pathotroph"                 
## [11] "Plant Pathogen"                    "Other Symbiotroph"
taxa <- "Endophyte"

# Construct a data.frame with the selected
# taxonomic group and grouping
# relative
df <- filter(FG_df, OTU == taxa)

# Using leveneTest()
result = leveneTest(Abundance ~ sample_type, df)
# print the result
print(result)
## Levene's Test for Homogeneity of Variance (center = median)
##        Df F value Pr(>F)
## group   3  1.0924 0.3547
##       136
# perform the Kruskal test
kruskal.test(Abundance ~ sample_type, data = df)
## 
##  Kruskal-Wallis rank sum test
## 
## data:  Abundance by sample_type
## Kruskal-Wallis chi-squared = 17.125, df = 3, p-value = 0.0006663
pairwise.wilcox.test(df$Abundance, df$sample_type,
                     p.adjust.method = "BH")
## 
##  Pairwise comparisons using Wilcoxon rank sum test with continuity correction 
## 
## data:  df$Abundance and df$sample_type 
## 
##              forest meadow organic
## meadow       0.4489 -      -      
## organic      0.0090 0.0084 -      
## conventional 0.0144 0.0123 0.9508 
## 
## P value adjustment method: BH
x <- df %>%
  group_by(sample_type) %>%
  summarise(mean = mean(Abundance, na.rm = TRUE), se = (sd(Abundance, na.rm = TRUE)/sqrt(length((Abundance)))))
x
## # A tibble: 4 × 3
##   sample_type     mean      se
##   <fct>          <dbl>   <dbl>
## 1 forest       0.00956 0.00571
## 2 meadow       0.0113  0.00320
## 3 organic      0.0299  0.00821
## 4 conventional 0.0243  0.00641

3. Test Trophic modes

# remove species

z <- as.data.frame(tax_table(ps_FG))

tax <- z[, 1:9]

# rename trophicmode to species
colnames(tax)[1] <- "species"
tax <- tax %>% as.matrix()

# reassign to phyloseq
x <- ps_FG
tax_table(x) <- tax_table(tax)

x <- aggregate_rare(x, level = "species", detection = 0, prevalence = 0)
x               
## phyloseq-class experiment-level object
## otu_table()   OTU Table:         [ 8 taxa and 140 samples ]
## sample_data() Sample Data:       [ 140 samples by 27 sample variables ]
## tax_table()   Taxonomy Table:    [ 8 taxa by 1 taxonomic ranks ]
# 8 taxa and 140 samples


# remove "Unknown"
#allTaxa = taxa_names(x)
#badTaxa = c("Unknown")
#myTaxa <- allTaxa[!(allTaxa %in% badTaxa)]
#x <- prune_taxa(myTaxa, x)
#x
# 7 taxa and 140 samples

x_RA <- transform(x, 'compositional')
#create data table
FG_df <-  psmelt(x_RA)
unique(FG_df$species)
## [1] "Saprotroph"                        "Saprotroph-Symbiotroph"           
## [3] "Unknown"                           "Symbiotroph"                      
## [5] "Pathotroph-Saprotroph"             "Pathotroph-Symbiotroph"           
## [7] "Pathotroph-Saprotroph-Symbiotroph" "Pathotroph"
FG_df$species <- as.factor(FG_df$species)

for (i in levels(FG_df$species)) {
  df <- filter(FG_df, species == i)

  print(i)
  result = leveneTest(Abundance ~ sample_type, df)
  print(result)
  k <- kruskal.test(Abundance ~ sample_type, data = df)
  print(k)
  w <- pairwise.wilcox.test(df$Abundance, df$sample_type,
                     p.adjust.method = "BH")
  print(w)
  res.aov <- aov(Abundance ~ sample_type, data = df)
  aov <- summary(res.aov)
  print(aov)
  tukey <- TukeyHSD(res.aov)
  print(tukey)
  }
## [1] "Pathotroph"
## Levene's Test for Homogeneity of Variance (center = median)
##        Df F value    Pr(>F)    
## group   3  14.948 1.826e-08 ***
##       136                      
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
##  Kruskal-Wallis rank sum test
## 
## data:  Abundance by sample_type
## Kruskal-Wallis chi-squared = 24.347, df = 3, p-value = 2.115e-05
## 
## 
##  Pairwise comparisons using Wilcoxon rank sum test with continuity correction 
## 
## data:  df$Abundance and df$sample_type 
## 
##              forest meadow organic
## meadow       0.0506 -      -      
## organic      0.0021 0.0042 -      
## conventional 0.0012 0.0012 0.3715 
## 
## P value adjustment method: BH 
##              Df Sum Sq Mean Sq F value   Pr(>F)    
## sample_type   3 0.1020 0.03400   11.43 9.88e-07 ***
## Residuals   136 0.4046 0.00297                     
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##   Tukey multiple comparisons of means
##     95% family-wise confidence level
## 
## Fit: aov(formula = Abundance ~ sample_type, data = df)
## 
## $sample_type
##                            diff         lwr        upr     p adj
## meadow-forest        0.01618704 -0.02676495 0.05913903 0.7609271
## organic-forest       0.05980597  0.01685398 0.10275796 0.0023053
## conventional-forest  0.07068982  0.02839362 0.11298602 0.0001562
## organic-meadow       0.04361893  0.01189678 0.07534108 0.0026909
## conventional-meadow  0.05450278  0.02367439 0.08533116 0.0000565
## conventional-organic 0.01088385 -0.01994454 0.04171223 0.7951192
## 
## [1] "Pathotroph-Saprotroph"
## Levene's Test for Homogeneity of Variance (center = median)
##        Df F value Pr(>F)
## group   3  0.6236 0.6009
##       136               
## 
##  Kruskal-Wallis rank sum test
## 
## data:  Abundance by sample_type
## Kruskal-Wallis chi-squared = 16.639, df = 3, p-value = 0.0008384
## 
## 
##  Pairwise comparisons using Wilcoxon rank sum exact test 
## 
## data:  df$Abundance and df$sample_type 
## 
##              forest  meadow  organic
## meadow       0.97764 -       -      
## organic      0.48530 0.22650 -      
## conventional 0.04408 0.00032 0.04408
## 
## P value adjustment method: BH 
##              Df Sum Sq Mean Sq F value Pr(>F)
## sample_type   3  0.062 0.02067   2.098  0.103
## Residuals   136  1.340 0.00985               
##   Tukey multiple comparisons of means
##     95% family-wise confidence level
## 
## Fit: aov(formula = Abundance ~ sample_type, data = df)
## 
## $sample_type
##                               diff          lwr        upr     p adj
## meadow-forest        -0.0272793473 -0.105437495 0.05087880 0.8006894
## organic-forest        0.0001389386 -0.078019209 0.07829709 1.0000000
## conventional-forest   0.0268088430 -0.050155981 0.10377367 0.8016557
## organic-meadow        0.0274182860 -0.030305332 0.08514190 0.6054541
## conventional-meadow   0.0540881903 -0.002009083 0.11018546 0.0631532
## conventional-organic  0.0266699044 -0.029427369 0.08276718 0.6047453
## 
## [1] "Pathotroph-Saprotroph-Symbiotroph"
## Levene's Test for Homogeneity of Variance (center = median)
##        Df F value    Pr(>F)    
## group   3  16.618 2.955e-09 ***
##       136                      
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
##  Kruskal-Wallis rank sum test
## 
## data:  Abundance by sample_type
## Kruskal-Wallis chi-squared = 12.707, df = 3, p-value = 0.005316
## 
## 
##  Pairwise comparisons using Wilcoxon rank sum test with continuity correction 
## 
## data:  df$Abundance and df$sample_type 
## 
##              forest meadow organic
## meadow       0.148  -      -      
## organic      0.303  0.023  -      
## conventional 0.442  0.012  0.159  
## 
## P value adjustment method: BH 
##              Df  Sum Sq  Mean Sq F value   Pr(>F)    
## sample_type   3 0.05287 0.017622   9.025 1.72e-05 ***
## Residuals   136 0.26556 0.001953                     
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##   Tukey multiple comparisons of means
##     95% family-wise confidence level
## 
## Fit: aov(formula = Abundance ~ sample_type, data = df)
## 
## $sample_type
##                             diff          lwr         upr     p adj
## meadow-forest        -0.02117396 -0.055973028  0.01362511 0.3919150
## organic-forest        0.02920514 -0.005593926  0.06400421 0.1331853
## conventional-forest  -0.00451020 -0.038777956  0.02975755 0.9861429
## organic-meadow        0.05037910  0.024678286  0.07607992 0.0000067
## conventional-meadow   0.01666376 -0.008312946  0.04164046 0.3095042
## conventional-organic -0.03371534 -0.058692048 -0.00873864 0.0033576
## 
## [1] "Pathotroph-Symbiotroph"
## Levene's Test for Homogeneity of Variance (center = median)
##        Df F value Pr(>F)
## group   3   1.342 0.2634
##       136               
## 
##  Kruskal-Wallis rank sum test
## 
## data:  Abundance by sample_type
## Kruskal-Wallis chi-squared = 10.052, df = 3, p-value = 0.01813
## 
## 
##  Pairwise comparisons using Wilcoxon rank sum test with continuity correction 
## 
## data:  df$Abundance and df$sample_type 
## 
##              forest meadow organic
## meadow       0.104  -      -      
## organic      0.736  0.104  -      
## conventional 0.736  0.023  0.394  
## 
## P value adjustment method: BH 
##              Df Sum Sq  Mean Sq F value Pr(>F)
## sample_type   3 0.0032 0.001068    1.35  0.261
## Residuals   136 0.1076 0.000791               
##   Tukey multiple comparisons of means
##     95% family-wise confidence level
## 
## Fit: aov(formula = Abundance ~ sample_type, data = df)
## 
## $sample_type
##                               diff         lwr         upr     p adj
## meadow-forest         0.0110798586 -0.01106945 0.033229164 0.5637908
## organic-forest        0.0082373461 -0.01391196 0.030386651 0.7682090
## conventional-forest   0.0003225014 -0.02148863 0.022133630 0.9999795
## organic-meadow       -0.0028425125 -0.01920086 0.013515834 0.9691437
## conventional-meadow  -0.0107573571 -0.02665481 0.005140098 0.2971540
## conventional-organic -0.0079148447 -0.02381230 0.007982610 0.5676954
## 
## [1] "Saprotroph"
## Levene's Test for Homogeneity of Variance (center = median)
##        Df F value   Pr(>F)   
## group   3  4.0842 0.008198 **
##       136                    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
##  Kruskal-Wallis rank sum test
## 
## data:  Abundance by sample_type
## Kruskal-Wallis chi-squared = 31.546, df = 3, p-value = 6.522e-07
## 
## 
##  Pairwise comparisons using Wilcoxon rank sum exact test 
## 
## data:  df$Abundance and df$sample_type 
## 
##              forest  meadow  organic
## meadow       5.2e-05 -       -      
## organic      0.161   9.4e-06 -      
## conventional 0.057   8.4e-05 0.388  
## 
## P value adjustment method: BH 
##              Df Sum Sq Mean Sq F value   Pr(>F)    
## sample_type   3  1.746  0.5819   15.82 7.01e-09 ***
## Residuals   136  5.003  0.0368                     
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##   Tukey multiple comparisons of means
##     95% family-wise confidence level
## 
## Fit: aov(formula = Abundance ~ sample_type, data = df)
## 
## $sample_type
##                             diff         lwr         upr     p adj
## meadow-forest         0.30830047  0.15726083  0.45934011 0.0000026
## organic-forest        0.06074104 -0.09029860  0.21178068 0.7226932
## conventional-forest   0.10108566 -0.04764790  0.24981922 0.2933518
## organic-meadow       -0.24755943 -0.35910960 -0.13600926 0.0000003
## conventional-meadow  -0.20721481 -0.31562209 -0.09880753 0.0000116
## conventional-organic  0.04034462 -0.06806266  0.14875190 0.7678339
## 
## [1] "Saprotroph-Symbiotroph"
## Levene's Test for Homogeneity of Variance (center = median)
##        Df F value  Pr(>F)  
## group   3  2.4348 0.06754 .
##       136                  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
##  Kruskal-Wallis rank sum test
## 
## data:  Abundance by sample_type
## Kruskal-Wallis chi-squared = 0.10016, df = 3, p-value = 0.9918
## 
## 
##  Pairwise comparisons using Wilcoxon rank sum exact test 
## 
## data:  df$Abundance and df$sample_type 
## 
##              forest meadow organic
## meadow       0.99   -      -      
## organic      0.99   0.99   -      
## conventional 0.99   0.99   0.99   
## 
## P value adjustment method: BH 
##              Df Sum Sq Mean Sq F value Pr(>F)
## sample_type   3 0.0623 0.02076   0.921  0.433
## Residuals   136 3.0669 0.02255               
##   Tukey multiple comparisons of means
##     95% family-wise confidence level
## 
## Fit: aov(formula = Abundance ~ sample_type, data = df)
## 
## $sample_type
##                             diff        lwr        upr     p adj
## meadow-forest        -0.03010025 -0.1483602 0.08815971 0.9111192
## organic-forest       -0.00292569 -0.1211857 0.11533427 0.9999039
## conventional-forest  -0.05189731 -0.1683517 0.06455705 0.6535488
## organic-meadow        0.02717456 -0.0601662 0.11451533 0.8499874
## conventional-meadow  -0.02179705 -0.1066770 0.06308292 0.9089813
## conventional-organic -0.04897162 -0.1338516 0.03590835 0.4399103
## 
## [1] "Symbiotroph"
## Levene's Test for Homogeneity of Variance (center = median)
##        Df F value    Pr(>F)    
## group   3   14.22 4.099e-08 ***
##       136                      
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
##  Kruskal-Wallis rank sum test
## 
## data:  Abundance by sample_type
## Kruskal-Wallis chi-squared = 13.84, df = 3, p-value = 0.003131
## 
## 
##  Pairwise comparisons using Wilcoxon rank sum exact test 
## 
## data:  df$Abundance and df$sample_type 
## 
##              forest meadow organic
## meadow       0.0022 -      -      
## organic      0.0028 0.5867 -      
## conventional 0.0026 0.8775 0.8775 
## 
## P value adjustment method: BH 
##              Df Sum Sq Mean Sq F value   Pr(>F)    
## sample_type   3 0.5949 0.19829   13.83 6.34e-08 ***
## Residuals   136 1.9500 0.01434                     
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##   Tukey multiple comparisons of means
##     95% family-wise confidence level
## 
## Fit: aov(formula = Abundance ~ sample_type, data = df)
## 
## $sample_type
##                             diff         lwr         upr     p adj
## meadow-forest        -0.21179677 -0.30609492 -0.11749863 0.0000002
## organic-forest       -0.19023387 -0.28453201 -0.09593572 0.0000034
## conventional-forest  -0.21889086 -0.31174926 -0.12603247 0.0000001
## organic-meadow        0.02156291 -0.04808089  0.09120671 0.8518287
## conventional-meadow  -0.00709409 -0.07477570  0.06058752 0.9928863
## conventional-organic -0.02865700 -0.09633860  0.03902461 0.6893331
## 
## [1] "Unknown"
## Levene's Test for Homogeneity of Variance (center = median)
##        Df F value Pr(>F)
## group   3  1.9805 0.1198
##       136               
## 
##  Kruskal-Wallis rank sum test
## 
## data:  Abundance by sample_type
## Kruskal-Wallis chi-squared = 13.761, df = 3, p-value = 0.003249
## 
## 
##  Pairwise comparisons using Wilcoxon rank sum exact test 
## 
## data:  df$Abundance and df$sample_type 
## 
##              forest  meadow  organic
## meadow       0.62012 -       -      
## organic      0.62012 0.15804 -      
## conventional 0.26905 0.00055 0.15804
## 
## P value adjustment method: BH 
##              Df Sum Sq Mean Sq F value Pr(>F)  
## sample_type   3  0.328 0.10930   3.305 0.0223 *
## Residuals   136  4.498 0.03307                 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##   Tukey multiple comparisons of means
##     95% family-wise confidence level
## 
## Fit: aov(formula = Abundance ~ sample_type, data = df)
## 
## $sample_type
##                             diff         lwr        upr     p adj
## meadow-forest        -0.04521703 -0.18843861 0.09800454 0.8443579
## organic-forest        0.03503112 -0.10819045 0.17825269 0.9201414
## conventional-forest   0.07639155 -0.06464330 0.21742640 0.4960123
## organic-meadow        0.08024815 -0.02552799 0.18602429 0.2031930
## conventional-meadow   0.12160859  0.01881266 0.22440452 0.0133339
## conventional-organic  0.04136044 -0.06143550 0.14415637 0.7223811
x <- FG_df %>% group_by(species, sample_type) %>%
  summarise(mean = mean(Abundance, na.rm = TRUE), se = (sd(Abundance, na.rm = TRUE)/sqrt(length((Abundance)))))
x
## # A tibble: 32 × 4
## # Groups:   species [8]
##    species                           sample_type    mean      se
##    <fct>                             <fct>         <dbl>   <dbl>
##  1 Pathotroph                        forest       0.0104 0.00378
##  2 Pathotroph                        meadow       0.0266 0.00556
##  3 Pathotroph                        organic      0.0702 0.00985
##  4 Pathotroph                        conventional 0.0811 0.0101 
##  5 Pathotroph-Saprotroph             forest       0.0841 0.0332 
##  6 Pathotroph-Saprotroph             meadow       0.0568 0.0102 
##  7 Pathotroph-Saprotroph             organic      0.0842 0.0153 
##  8 Pathotroph-Saprotroph             conventional 0.111  0.0171 
##  9 Pathotroph-Saprotroph-Symbiotroph forest       0.0315 0.0153 
## 10 Pathotroph-Saprotroph-Symbiotroph meadow       0.0104 0.00266
## # ℹ 22 more rows

4. Trophic modes in forest deepest layer

ps_x <- subset_samples(ps_FG, sample_type=="forest" & depth=="40...")
ps_x
## phyloseq-class experiment-level object
## otu_table()   OTU Table:         [ 20610 taxa and 3 samples ]
## sample_data() Sample Data:       [ 3 samples by 27 sample variables ]
## tax_table()   Taxonomy Table:    [ 20610 taxa by 11 taxonomic ranks ]
ps_FG_Tm <- aggregate_rare(ps_x, level = "trophicMode", detection = 0, prevalence = 0)
ps_FG_Tm
## phyloseq-class experiment-level object
## otu_table()   OTU Table:         [ 8 taxa and 3 samples ]
## sample_data() Sample Data:       [ 3 samples by 27 sample variables ]
## tax_table()   Taxonomy Table:    [ 8 taxa by 1 taxonomic ranks ]
# 8 taxa and 15 samples

# remove "Unknown"
#allTaxa = taxa_names(ps_FG_Tm)
#badTaxa = c("Unknown")
#myTaxa <- allTaxa[!(allTaxa %in% badTaxa)]
#ps_FG_Tm <- prune_taxa(myTaxa, ps_FG_Tm)
#ps_FG_Tm
# 7 taxa and 15 samples

ps_FG_Tm_RA <- microbiome::transform(ps_FG_Tm, "compositional")

FG_df <- psmelt(ps_FG_Tm_RA)

x <- FG_df %>%
  group_by(OTU) %>%
  summarise(mean = mean(Abundance, na.rm = TRUE), se = (sd(Abundance, na.rm = TRUE)/sqrt(length((Abundance)))))

x
## # A tibble: 8 × 3
##   OTU                                  mean      se
##   <chr>                               <dbl>   <dbl>
## 1 Pathotroph                        0.00245 0.00245
## 2 Pathotroph-Saprotroph             0.172   0.159  
## 3 Pathotroph-Saprotroph-Symbiotroph 0.00880 0.00880
## 4 Pathotroph-Symbiotroph            0       0      
## 5 Saprotroph                        0.00920 0.00898
## 6 Saprotroph-Symbiotroph            0.0130  0.0129 
## 7 Symbiotroph                       0.464   0.0574 
## 8 Unknown                           0.331   0.166

7. Test symbiotrophs between soillayers

# remove species

z <- as.data.frame(tax_table(ps_FG))

tax <- z[, 1:9]

# rename trophic mode to species
colnames(tax)[1] <- "species"

tax <- tax %>% as.matrix()

# reassign to phyloseq
x <- ps_FG
tax_table(x) <- tax_table(tax)

x <- aggregate_rare(x, level = "species", detection = 0, prevalence = 0)
x               
## phyloseq-class experiment-level object
## otu_table()   OTU Table:         [ 8 taxa and 140 samples ]
## sample_data() Sample Data:       [ 140 samples by 27 sample variables ]
## tax_table()   Taxonomy Table:    [ 8 taxa by 1 taxonomic ranks ]
# 8 taxa and 140 samples


# remove "Unknown"
#allTaxa = taxa_names(x)
#badTaxa = c("Unknown")
#myTaxa <- allTaxa[!(allTaxa %in% badTaxa)]
#x <- prune_taxa(myTaxa, x)
#x
# 2 taxa and 140 samples

x_RA <- transform(x, 'compositional')
#create data table
FG_df <-  psmelt(x_RA)
# remove forest
FG_df <- subset(FG_df, sample_type!="forest" | sample_type!="meadow")

unique(FG_df$species)
## [1] "Saprotroph"                        "Saprotroph-Symbiotroph"           
## [3] "Unknown"                           "Symbiotroph"                      
## [5] "Pathotroph-Saprotroph"             "Pathotroph-Symbiotroph"           
## [7] "Pathotroph-Saprotroph-Symbiotroph" "Pathotroph"
df <- filter(FG_df, species == "Symbiotroph")

result = leveneTest(Abundance ~ depth, df)
print(result)
## Levene's Test for Homogeneity of Variance (center = median)
##        Df F value    Pr(>F)    
## group   4  8.1269 6.674e-06 ***
##       135                      
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
k <- kruskal.test(Abundance ~ depth, data = df)
print(k)
## 
##  Kruskal-Wallis rank sum test
## 
## data:  Abundance by depth
## Kruskal-Wallis chi-squared = 21.947, df = 4, p-value = 0.0002054
w <- pairwise.wilcox.test(df$Abundance, df$depth,
                     p.adjust.method = "BH")
print(w)
## 
##  Pairwise comparisons using Wilcoxon rank sum exact test 
## 
## data:  df$Abundance and df$depth 
## 
##         0...10  10...20 20...30 30...40
## 10...20 0.0038  -       -       -      
## 20...30 6.6e-05 0.0134  -       -      
## 30...40 0.0009  0.0118  0.2752  -      
## 40...   0.2752  0.5454  0.7618  0.6229 
## 
## P value adjustment method: BH
x <- df %>% group_by(depth) %>%
  summarise(mean = mean(Abundance, na.rm = TRUE), se = (sd(Abundance, na.rm = TRUE)/sqrt(length((Abundance)))))
x
## # A tibble: 5 × 3
##   depth     mean      se
##   <chr>    <dbl>   <dbl>
## 1 0...10  0.0157 0.00272
## 2 10...20 0.0279 0.00575
## 3 20...30 0.0560 0.0123 
## 4 30...40 0.151  0.0413 
## 5 40...   0.126  0.0305

8. Test saprotrophs between soillayers

# remove species

z <- as.data.frame(tax_table(ps_FG))

tax <- z[, 1:9]

# rename trophic mode to species
colnames(tax)[1] <- "species"

tax <- tax %>% as.matrix()

# reassign to phyloseq
x <- ps_FG
tax_table(x) <- tax_table(tax)

x <- aggregate_rare(x, level = "species", detection = 0, prevalence = 0)
x               
## phyloseq-class experiment-level object
## otu_table()   OTU Table:         [ 8 taxa and 140 samples ]
## sample_data() Sample Data:       [ 140 samples by 27 sample variables ]
## tax_table()   Taxonomy Table:    [ 8 taxa by 1 taxonomic ranks ]
# 8 taxa and 140 samples


# remove "Unknown"
#allTaxa = taxa_names(x)
#badTaxa = c("Unknown")
#myTaxa <- allTaxa[!(allTaxa %in% badTaxa)]
#x <- prune_taxa(myTaxa, x)
#x
# 2 taxa and 140 samples

x_RA <- transform(x, 'compositional')
#create data table
FG_df <-  psmelt(x_RA)
# remove forest
FG_df <- subset(FG_df, sample_type!="forest" & sample_type!="meadow")

unique(FG_df$species)
## [1] "Saprotroph-Symbiotroph"            "Saprotroph"                       
## [3] "Unknown"                           "Symbiotroph"                      
## [5] "Pathotroph-Saprotroph"             "Pathotroph-Symbiotroph"           
## [7] "Pathotroph"                        "Pathotroph-Saprotroph-Symbiotroph"
df <- filter(FG_df, species == "Saprotroph")

result = leveneTest(Abundance ~ depth, df)
print(result)
## Levene's Test for Homogeneity of Variance (center = median)
##       Df F value  Pr(>F)  
## group  4  2.5144 0.04792 *
##       80                  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
k <- kruskal.test(Abundance ~ depth, data = df)
print(k)
## 
##  Kruskal-Wallis rank sum test
## 
## data:  Abundance by depth
## Kruskal-Wallis chi-squared = 24.835, df = 4, p-value = 5.43e-05
w <- pairwise.wilcox.test(df$Abundance, df$depth,
                     p.adjust.method = "BH")
print(w)
## 
##  Pairwise comparisons using Wilcoxon rank sum exact test 
## 
## data:  df$Abundance and df$depth 
## 
##         0...10  10...20 20...30 30...40
## 10...20 0.73386 -       -       -      
## 20...30 0.25672 0.25672 -       -      
## 30...40 0.00014 0.00014 0.00289 -      
## 40...   0.02354 0.01348 0.16448 0.73386
## 
## P value adjustment method: BH
x <- df %>% group_by(depth) %>%
  summarise(mean = mean(Abundance, na.rm = TRUE), se = (sd(Abundance, na.rm = TRUE)/sqrt(length((Abundance)))))
x
## # A tibble: 5 × 3
##   depth    mean     se
##   <chr>   <dbl>  <dbl>
## 1 0...10  0.308 0.0280
## 2 10...20 0.311 0.0173
## 3 20...30 0.251 0.0331
## 4 30...40 0.112 0.0310
## 5 40...   0.199 0.0633

9 Test Trophic modes in soillayers

# remove species

z <- as.data.frame(tax_table(ps_FG))

tax <- z[, 1:9]


# rename trophic mode to species
colnames(tax)[1] <- "species"

tax <- tax %>% as.matrix()

# reassign to phyloseq
x <- ps_FG
tax_table(x) <- tax_table(tax)

x <- aggregate_rare(x, level = "species", detection = 0, prevalence = 0)
x               
## phyloseq-class experiment-level object
## otu_table()   OTU Table:         [ 8 taxa and 140 samples ]
## sample_data() Sample Data:       [ 140 samples by 27 sample variables ]
## tax_table()   Taxonomy Table:    [ 8 taxa by 1 taxonomic ranks ]
# 8 taxa and 140 samples


# remove "Unknown"
#allTaxa = taxa_names(x)
#badTaxa = c("Unknown")
#myTaxa <- allTaxa[!(allTaxa %in% badTaxa)]
#x <- prune_taxa(myTaxa, x)
#x
# 2 taxa and 140 samples

x_RA <- transform(x, 'compositional')
#create data table
FG_df <-  psmelt(x_RA)

unique(FG_df$species)
## [1] "Saprotroph"                        "Saprotroph-Symbiotroph"           
## [3] "Unknown"                           "Symbiotroph"                      
## [5] "Pathotroph-Saprotroph"             "Pathotroph-Symbiotroph"           
## [7] "Pathotroph-Saprotroph-Symbiotroph" "Pathotroph"
FG_df <- filter(FG_df, sample_type != "forest")
FG_df <- droplevels(FG_df)

# make factor
FG_df$species <- as.factor(FG_df$species)
FG_df$depth <- as.factor(FG_df$depth)

#create data frame with 0 rows and 6 columns
df_test <- data.frame(matrix(ncol = 6))

#provide column names
colnames(df_test) <- c('sample_type', 'species', 'depth', 'LevenesP', 'ANOVAsOrKrusalsP', 'mean_se_cld')

for (i in levels(FG_df$depth)) {
  for (j in levels(FG_df$species)) {
    df <- filter(FG_df, depth == i & species == j)
    levene <- leveneTest(Abundance ~ sample_type, df)
    if (levene[1,3]>0.05){
      res.aov <- aov(Abundance ~ sample_type, data = df)
    res.aov2 <- summary(res.aov)
    res.aov2 <- res.aov2[[1]]
    df.res.aov <- data.frame(matrix(ncol = 3, nrow =3))
    colnames(df.res.aov) <- c('sample_type', 'LevenesP', 'ANOVAsOrKrusalsP')
    df.res.aov$sample_type <- levels(FG_df$sample_type)
    df.res.aov$LevenesP <- levene[1,3]
    df.res.aov$ANOVAsOrKrusalsP <- res.aov2[1,5]
    df.res.aov$LevenesP <- sprintf("%.3f", round(df.res.aov$LevenesP, digits = 3))
    df.res.aov$ANOVAsOrKrusalsP <- sprintf("%.3f", round(df.res.aov$ANOVAsOrKrusalsP, digits = 3))
    
    tukey <- glht(res.aov, linfct=mcp(sample_type="Tukey"))
    cld <- cld(tukey)
    cld <- cld[["mcletters"]][["Letters"]]
    cld <- as.data.frame(cld)
    cld$sample_type <- row.names(cld)
    row.names(cld) <- NULL
    } else {
      res.aov2 <- kruskal.test(Abundance ~ sample_type, data = df)
    df.res.aov <- data.frame(matrix(ncol = 3, nrow =3))
    colnames(df.res.aov) <- c('sample_type', 'LevenesP', 'ANOVAsOrKrusalsP')
    df.res.aov$sample_type <- levels(FG_df$sample_type)
    df.res.aov$LevenesP <- levene[1,3]
    df.res.aov$ANOVAsOrKrusalsP <- res.aov2[["p.value"]]
    df.res.aov$LevenesP <- sprintf("%.3f", round(df.res.aov$LevenesP, digits = 3))
    df.res.aov$ANOVAsOrKrusalsP <- sprintf("%.3f", round(df.res.aov$ANOVAsOrKrusalsP, digits = 3))
    wilcox.res <- pairwise.wilcox.test(df$Abundance, df$sample_type,
                     p.adjust.method = "BH")
    wilcox.res <- wilcox.res[["p.value"]]
    
    cld = fullPTable(wilcox.res)
    cld[is.na(cld)] <- 0
    cld <- multcompLetters(cld)
    cld <- cld[["Letters"]]
    cld <- as.data.frame(cld)
    cld$sample_type <- row.names(cld)
    row.names(cld) <- NULL
    }
    
    x <- df %>%
      dplyr::group_by(sample_type) %>%
      dplyr::summarise(mean = mean(Abundance, na.rm = TRUE), se = (sd(Abundance, na.rm = TRUE)/sqrt(length((Abundance)))))
    
    x$mean <- sprintf("%.3f", round(x$mean, digits = 3))
    x$se <- sprintf("%.3f", round(x$se, digits = 3))
    x <- cbind(species = 0, x)
    x$species <- j
    x$depth <- i
    
    output <- merge(x, df.res.aov, by='sample_type')
    output <- merge(output, cld, by='sample_type')
    
    output$mean_se = paste(output$mean, output$se, sep="±")
    output$mean_se_cld = paste(output$mean_se, output$cld, sep="")
    output <- output[, -c(3,4,8,9)]
    
    df_test <- rbind(df_test, output)
  }
}
df_test <- na.omit(df_test)

setwd('C:\\Users\\lehakkin\\PhD\\Fungal_ITS_soil_vertical_profile')

#write.csv2(df_test, file = "test.csv")

RESULTS STEPS 8: Spearman correlations of depth with 5 most abundant phyla, classes, genera and trophic mode

Note all correlations are done without forest!

library("ggpubr")
library('phyloseq')
library("cowplot")
library("dplyr")
library("ggplot2")
library("vegan")
library("microbiome")
library("tibble")
library(stringr)
library(reshape2)
library(tidyr)

setwd('C:\\Users\\lehakkin\\PhD\\Fungal_ITS_soil_vertical_profile')

load('ps_FINAL')
ps
## phyloseq-class experiment-level object
## otu_table()   OTU Table:         [ 20610 taxa and 140 samples ]
## sample_data() Sample Data:       [ 140 samples by 28 sample variables ]
## tax_table()   Taxonomy Table:    [ 20610 taxa by 7 taxonomic ranks ]
load(file = 'ps_FG_with_NAs')#ps_FG
ps_FG
## phyloseq-class experiment-level object
## otu_table()   OTU Table:         [ 20610 taxa and 140 samples ]
## sample_data() Sample Data:       [ 140 samples by 27 sample variables ]
## tax_table()   Taxonomy Table:    [ 20610 taxa by 11 taxonomic ranks ]

Spearman rank correlation with depth without forest

1. Trophic modes

1.1. Remove forest

# this aggregation makes NAs into "Unknown"
ps_FG_x <- aggregate_rare(ps_FG, level = 'trophicMode', detection = 0/100, prevalence = 0/140)
ps_FG_x
## phyloseq-class experiment-level object
## otu_table()   OTU Table:         [ 8 taxa and 140 samples ]
## sample_data() Sample Data:       [ 140 samples by 27 sample variables ]
## tax_table()   Taxonomy Table:    [ 8 taxa by 1 taxonomic ranks ]
# 8 taxa

# remove "Unknown"
#allTaxa = taxa_names(ps_FG_x)
#badTaxa = c("Unknown")
#myTaxa <- allTaxa[!(allTaxa %in% badTaxa)]
#ps_FG_x_pruned <- prune_taxa(myTaxa, ps_FG_x)
#ps_FG_x_pruned
# 7 taxa

FG_nf <- subset_samples(ps_FG_x, sample_type != "forest")
FG_nf
## phyloseq-class experiment-level object
## otu_table()   OTU Table:         [ 8 taxa and 125 samples ]
## sample_data() Sample Data:       [ 125 samples by 27 sample variables ]
## tax_table()   Taxonomy Table:    [ 8 taxa by 1 taxonomic ranks ]
FG_RA_nf <- transform(FG_nf, "compositional")
FG_RA_nf
## phyloseq-class experiment-level object
## otu_table()   OTU Table:         [ 8 taxa and 125 samples ]
## sample_data() Sample Data:       [ 125 samples by 27 sample variables ]
## tax_table()   Taxonomy Table:    [ 8 taxa by 1 taxonomic ranks ]
df <- psmelt(FG_RA_nf)

1.2. Five most abundant Trophic modes

df$year <- "2019"

# sum the values if same genus: Sum by Group Based on dplyr Package

x <- df %>%
  group_by(year, OTU) %>%
  summarise(mean = mean(Abundance, na.rm = TRUE), se = (sd(Abundance, na.rm = TRUE)/sqrt(length((Abundance)))))

y <- x %>%
    arrange(desc(mean)) %>%
    slice(1:6)
y
## # A tibble: 6 × 4
## # Groups:   year [1]
##   year  OTU                      mean      se
##   <chr> <chr>                   <dbl>   <dbl>
## 1 2019  Saprotroph             0.309  0.0199 
## 2 2019  Unknown                0.307  0.0166 
## 3 2019  Saprotroph-Symbiotroph 0.147  0.0129 
## 4 2019  Pathotroph-Saprotroph  0.0851 0.00869
## 5 2019  Pathotroph             0.0602 0.00551
## 6 2019  Symbiotroph            0.0531 0.00847

1.3. Test for each Trophic mode separately

taxa <- y$OTU

for (i in taxa) {
 df_x <- subset(df, OTU == i)
 print(i)
 x <- cor.test(df_x$Abundance, df_x$depth_numerical,  method = "spearman", na.rm = TRUE)
 print(x)
}
## [1] "Saprotroph"
## 
##  Spearman's rank correlation rho
## 
## data:  df_x$Abundance and df_x$depth_numerical
## S = 473587, p-value = 9.808e-08
## alternative hypothesis: true rho is not equal to 0
## sample estimates:
##        rho 
## -0.4549534 
## 
## [1] "Unknown"
## 
##  Spearman's rank correlation rho
## 
## data:  df_x$Abundance and df_x$depth_numerical
## S = 187721, p-value = 8.737e-07
## alternative hypothesis: true rho is not equal to 0
## sample estimates:
##       rho 
## 0.4232854 
## 
## [1] "Saprotroph-Symbiotroph"
## 
##  Spearman's rank correlation rho
## 
## data:  df_x$Abundance and df_x$depth_numerical
## S = 360251, p-value = 0.236
## alternative hypothesis: true rho is not equal to 0
## sample estimates:
##       rho 
## -0.106762 
## 
## [1] "Pathotroph-Saprotroph"
## 
##  Spearman's rank correlation rho
## 
## data:  df_x$Abundance and df_x$depth_numerical
## S = 304017, p-value = 0.4646
## alternative hypothesis: true rho is not equal to 0
## sample estimates:
##        rho 
## 0.06600116 
## 
## [1] "Pathotroph"
## 
##  Spearman's rank correlation rho
## 
## data:  df_x$Abundance and df_x$depth_numerical
## S = 540182, p-value < 2.2e-16
## alternative hypothesis: true rho is not equal to 0
## sample estimates:
##        rho 
## -0.6595454 
## 
## [1] "Symbiotroph"
## 
##  Spearman's rank correlation rho
## 
## data:  df_x$Abundance and df_x$depth_numerical
## S = 241758, p-value = 0.003775
## alternative hypothesis: true rho is not equal to 0
## sample estimates:
##       rho 
## 0.2572713

2. Phyla

2.1. Remove forest

ps_nf <- subset_samples(ps, sample_type != "forest")
ps_nf
## phyloseq-class experiment-level object
## otu_table()   OTU Table:         [ 20610 taxa and 125 samples ]
## sample_data() Sample Data:       [ 125 samples by 28 sample variables ]
## tax_table()   Taxonomy Table:    [ 20610 taxa by 7 taxonomic ranks ]
ps_RA_nf <- transform(ps_nf, "compositional")
ps_RA_nf
## phyloseq-class experiment-level object
## otu_table()   OTU Table:         [ 20610 taxa and 125 samples ]
## sample_data() Sample Data:       [ 125 samples by 28 sample variables ]
## tax_table()   Taxonomy Table:    [ 20610 taxa by 7 taxonomic ranks ]
ps_RA_nf_phy <- aggregate_rare(ps_RA_nf, level = "phylum", detection = 0.0, prevalence = 0.0)
ps_RA_nf_phy
## phyloseq-class experiment-level object
## otu_table()   OTU Table:         [ 14 taxa and 125 samples ]
## sample_data() Sample Data:       [ 125 samples by 28 sample variables ]
## tax_table()   Taxonomy Table:    [ 14 taxa by 1 taxonomic ranks ]
df <- psmelt(ps_RA_nf_phy)

2.2. Five most abundant phyla

df$year <- "2019"

x <- df %>%
  group_by(year, OTU) %>%
  summarise(mean = mean(Abundance, na.rm = TRUE), se = (sd(Abundance, na.rm = TRUE)/sqrt(length((Abundance)))))
y <- x %>%
    arrange(desc(mean)) %>%
    slice(1:5)
print(y)
## # A tibble: 5 × 4
## # Groups:   year [1]
##   year  OTU                  mean       se
##   <chr> <chr>               <dbl>    <dbl>
## 1 2019  Ascomycota        0.692   0.0151  
## 2 2019  Basidiomycota     0.199   0.0134  
## 3 2019  Mortierellomycota 0.0793  0.00956 
## 4 2019  Glomeromycota     0.0264  0.00683 
## 5 2019  Chytridiomycota   0.00209 0.000362

2.3. Test for each phyla separately

taxa <- y$OTU

for (i in taxa) {
  df_x <- subset(df, OTU == i)
  print(i)
  x <- cor.test(df_x$Abundance, df_x$depth_numerical,  method = "spearman", na.rm = TRUE)
  print(x)
  }
## [1] "Ascomycota"
## 
##  Spearman's rank correlation rho
## 
## data:  df_x$Abundance and df_x$depth_numerical
## S = 298097, p-value = 0.3506
## alternative hypothesis: true rho is not equal to 0
## sample estimates:
##        rho 
## 0.08418676 
## 
## [1] "Basidiomycota"
## 
##  Spearman's rank correlation rho
## 
## data:  df_x$Abundance and df_x$depth_numerical
## S = 491192, p-value = 1.347e-09
## alternative hypothesis: true rho is not equal to 0
## sample estimates:
##        rho 
## -0.5090399 
## 
## [1] "Mortierellomycota"
## 
##  Spearman's rank correlation rho
## 
## data:  df_x$Abundance and df_x$depth_numerical
## S = 245027, p-value = 0.005442
## alternative hypothesis: true rho is not equal to 0
## sample estimates:
##     rho 
## 0.24723 
## 
## [1] "Glomeromycota"
## 
##  Spearman's rank correlation rho
## 
## data:  df_x$Abundance and df_x$depth_numerical
## S = 257422, p-value = 0.01924
## alternative hypothesis: true rho is not equal to 0
## sample estimates:
##       rho 
## 0.2091478 
## 
## [1] "Chytridiomycota"
## 
##  Spearman's rank correlation rho
## 
## data:  df_x$Abundance and df_x$depth_numerical
## S = 569974, p-value < 2.2e-16
## alternative hypothesis: true rho is not equal to 0
## sample estimates:
##        rho 
## -0.7510735

3. Classes

3.1. Remove forest

ps_nf <- subset_samples(ps, sample_type != "forest")
ps_nf
## phyloseq-class experiment-level object
## otu_table()   OTU Table:         [ 20610 taxa and 125 samples ]
## sample_data() Sample Data:       [ 125 samples by 28 sample variables ]
## tax_table()   Taxonomy Table:    [ 20610 taxa by 7 taxonomic ranks ]
ps_RA_nf <- transform(ps_nf, "compositional")
ps_RA_nf
## phyloseq-class experiment-level object
## otu_table()   OTU Table:         [ 20610 taxa and 125 samples ]
## sample_data() Sample Data:       [ 125 samples by 28 sample variables ]
## tax_table()   Taxonomy Table:    [ 20610 taxa by 7 taxonomic ranks ]
ps_RA_nf_cla <- aggregate_rare(ps_RA_nf, level = "class", detection = 0.0, prevalence = 0.0)
ps_RA_nf_cla
## phyloseq-class experiment-level object
## otu_table()   OTU Table:         [ 65 taxa and 125 samples ]
## sample_data() Sample Data:       [ 125 samples by 28 sample variables ]
## tax_table()   Taxonomy Table:    [ 65 taxa by 2 taxonomic ranks ]
df <- psmelt(ps_RA_nf_cla)

3.2. Five most abundant phyla

df$year <- "2019"

x <- df %>%
  group_by(year, OTU) %>%
  summarise(mean = mean(Abundance, na.rm = TRUE), se = (sd(Abundance, na.rm = TRUE)/sqrt(length((Abundance)))))
y <- x %>%
    arrange(desc(mean)) %>%
    slice(1:5)
print(y)
## # A tibble: 5 × 4
## # Groups:   year [1]
##   year  OTU                  mean      se
##   <chr> <chr>               <dbl>   <dbl>
## 1 2019  Leotiomycetes      0.291  0.0218 
## 2 2019  Sordariomycetes    0.185  0.0131 
## 3 2019  Dothideomycetes    0.137  0.0140 
## 4 2019  Tremellomycetes    0.132  0.0111 
## 5 2019  Mortierellomycetes 0.0793 0.00956

3.3. Test for each class separately

taxa <- y$OTU

for (i in taxa) {
  df_x <- subset(df, OTU == i)
  print(i)
  x <- cor.test(df_x$Abundance, df_x$depth_numerical,  method = "spearman", na.rm = TRUE)
  print(x)
}
## [1] "Leotiomycetes"
## 
##  Spearman's rank correlation rho
## 
## data:  df_x$Abundance and df_x$depth_numerical
## S = 139957, p-value = 3.966e-12
## alternative hypothesis: true rho is not equal to 0
## sample estimates:
##       rho 
## 0.5700243 
## 
## [1] "Sordariomycetes"
## 
##  Spearman's rank correlation rho
## 
## data:  df_x$Abundance and df_x$depth_numerical
## S = 426742, p-value = 0.0004147
## alternative hypothesis: true rho is not equal to 0
## sample estimates:
##        rho 
## -0.3110364 
## 
## [1] "Dothideomycetes"
## 
##  Spearman's rank correlation rho
## 
## data:  df_x$Abundance and df_x$depth_numerical
## S = 548039, p-value < 2.2e-16
## alternative hypothesis: true rho is not equal to 0
## sample estimates:
##        rho 
## -0.6836843 
## 
## [1] "Tremellomycetes"
## 
##  Spearman's rank correlation rho
## 
## data:  df_x$Abundance and df_x$depth_numerical
## S = 558143, p-value < 2.2e-16
## alternative hypothesis: true rho is not equal to 0
## sample estimates:
##        rho 
## -0.7147252 
## 
## [1] "Mortierellomycetes"
## 
##  Spearman's rank correlation rho
## 
## data:  df_x$Abundance and df_x$depth_numerical
## S = 245027, p-value = 0.005442
## alternative hypothesis: true rho is not equal to 0
## sample estimates:
##     rho 
## 0.24723

4. Genera

4.1. Remove forest

ps_nf <- subset_samples(ps, sample_type != "forest")
ps_nf
## phyloseq-class experiment-level object
## otu_table()   OTU Table:         [ 20610 taxa and 125 samples ]
## sample_data() Sample Data:       [ 125 samples by 28 sample variables ]
## tax_table()   Taxonomy Table:    [ 20610 taxa by 7 taxonomic ranks ]
ps_RA_nf <- transform(ps_nf, "compositional")
ps_RA_nf
## phyloseq-class experiment-level object
## otu_table()   OTU Table:         [ 20610 taxa and 125 samples ]
## sample_data() Sample Data:       [ 125 samples by 28 sample variables ]
## tax_table()   Taxonomy Table:    [ 20610 taxa by 7 taxonomic ranks ]
ps_RA_nf_gen <- aggregate_rare(ps_RA_nf, level = "genus", detection = 0.0, prevalence = 0.0)
ps_RA_nf_gen
## phyloseq-class experiment-level object
## otu_table()   OTU Table:         [ 895 taxa and 125 samples ]
## sample_data() Sample Data:       [ 125 samples by 28 sample variables ]
## tax_table()   Taxonomy Table:    [ 895 taxa by 2 taxonomic ranks ]
df <- psmelt(ps_RA_nf_gen)

4.2. Five most abundant genera

Note! Exclude the obes not classified at genus level!

df$year <- "2019"

x <- df %>%
  group_by(year, OTU) %>%
  summarise(mean = mean(Abundance, na.rm = TRUE), se = (sd(Abundance, na.rm = TRUE)/sqrt(length((Abundance)))))
y <- x %>%
    arrange(desc(mean)) %>%
    slice(1:6)
print(y)
## # A tibble: 6 × 4
## # Groups:   year [1]
##   year  OTU                          mean      se
##   <chr> <chr>                       <dbl>   <dbl>
## 1 2019  Leotiomycetes_unclassified 0.109  0.0168 
## 2 2019  Saitozyma                  0.0664 0.00702
## 3 2019  Pseudeurotium              0.0533 0.0122 
## 4 2019  Paraphaeosphaeria          0.0516 0.0110 
## 5 2019  Mortierella                0.0478 0.00662
## 6 2019  Solicoccozyma              0.0463 0.00584

4.3. Test for each genera separately

We don’t want to test for Leotiomycetes_unclassified

taxa <- y$OTU
taxa[! taxa %in% c("Leotiomycetes_unclassified")]
## [1] "Saitozyma"         "Pseudeurotium"     "Paraphaeosphaeria"
## [4] "Mortierella"       "Solicoccozyma"
for (i in taxa) {
  df_x <- subset(df, OTU == i)
  print(i)
  x <- cor.test(df_x$Abundance, df_x$depth_numerical,  method = "spearman", na.rm = TRUE)
  print(x)
}
## [1] "Leotiomycetes_unclassified"
## 
##  Spearman's rank correlation rho
## 
## data:  df_x$Abundance and df_x$depth_numerical
## S = 186445, p-value = 6.745e-07
## alternative hypothesis: true rho is not equal to 0
## sample estimates:
##       rho 
## 0.4272047 
## 
## [1] "Saitozyma"
## 
##  Spearman's rank correlation rho
## 
## data:  df_x$Abundance and df_x$depth_numerical
## S = 549794, p-value < 2.2e-16
## alternative hypothesis: true rho is not equal to 0
## sample estimates:
##        rho 
## -0.6890738 
## 
## [1] "Pseudeurotium"
## 
##  Spearman's rank correlation rho
## 
## data:  df_x$Abundance and df_x$depth_numerical
## S = 329787, p-value = 0.8841
## alternative hypothesis: true rho is not equal to 0
## sample estimates:
##         rho 
## -0.01316959 
## 
## [1] "Paraphaeosphaeria"
## 
##  Spearman's rank correlation rho
## 
## data:  df_x$Abundance and df_x$depth_numerical
## S = 565995, p-value < 2.2e-16
## alternative hypothesis: true rho is not equal to 0
## sample estimates:
##        rho 
## -0.7388476 
## 
## [1] "Mortierella"
## 
##  Spearman's rank correlation rho
## 
## data:  df_x$Abundance and df_x$depth_numerical
## S = 239975, p-value = 0.003073
## alternative hypothesis: true rho is not equal to 0
## sample estimates:
##       rho 
## 0.2627505 
## 
## [1] "Solicoccozyma"
## 
##  Spearman's rank correlation rho
## 
## data:  df_x$Abundance and df_x$depth_numerical
## S = 545184, p-value < 2.2e-16
## alternative hypothesis: true rho is not equal to 0
## sample estimates:
##        rho 
## -0.6749137

RESULTS STEP 9: Calculate the average number of reads and OTUs and the total number of OTUs in each soil layer

library('phyloseq')
library("dplyr")
library("tibble")
library("microbiome")
library("tibble")

setwd('C:\\Users\\lehakkin\\PhD\\Fungal_ITS_soil_vertical_profile')

load('ps_FINAL')
ps
## phyloseq-class experiment-level object
## otu_table()   OTU Table:         [ 20610 taxa and 140 samples ]
## sample_data() Sample Data:       [ 140 samples by 28 sample variables ]
## tax_table()   Taxonomy Table:    [ 20610 taxa by 7 taxonomic ranks ]
meta <- meta(ps)

OTU <- as.data.frame(otu_table(ps))
OTU <- t(OTU)

# calculate reads per sample:
OTU2 <- OTU
OTU2 <- as.data.frame(OTU2)
OTU2$reads <- rowSums(OTU2)

# add to meta

meta$reads <- OTU2$reads

OTU <- as.matrix(OTU)

#convert an abundance matrix to a presence-absence matrix. (make any number greater than 0 into a 1)
OTU[OTU > 0] <- 1 #converts from abundance to P/A
OTU <- as.data.frame(OTU)
# calculate number of different otus in a sample
OTU$OTUs <- rowSums(OTU)

# add to meta
meta$OTUs <- OTU$OTUs
rm(OTU)

# save new meta
sample_data(ps) <- sample_data(meta)
#save(ps, file = 'ps_FINAL')

mean(meta$OTUs)
## [1] 1480.243
# 1480.243

# mean values of OTUs in soil layers

x <- meta %>%
  dplyr::group_by(depth) %>%
  dplyr::summarise(OTUs_mean = mean(OTUs, na.rm = TRUE), OTUs_se = (sd(OTUs, na.rm = TRUE)/sqrt(length((OTUs)))))
print(paste("how many OTUs on average in each soil layer"))
## [1] "how many OTUs on average in each soil layer"
print(x)
## # A tibble: 5 × 3
##   depth   OTUs_mean OTUs_se
##   <chr>       <dbl>   <dbl>
## 1 0...10      2184.   183. 
## 2 10...20     2495.   134. 
## 3 20...30     1900.   187. 
## 4 30...40      607.    86.0
## 5 40...        215.    39.9
y <- meta %>%
  dplyr::group_by(depth) %>%
  dplyr::summarise(reads_mean = mean(reads, na.rm = TRUE), reads_se = (sd(reads, na.rm = TRUE)/sqrt(length((reads)))))
print(paste("how many reads on average in each soil layer"))
## [1] "how many reads on average in each soil layer"
print(y)
## # A tibble: 5 × 3
##   depth   reads_mean reads_se
##   <chr>        <dbl>    <dbl>
## 1 0...10      80957.    4871.
## 2 10...20     95172.    6279.
## 3 20...30     97826.    8155.
## 4 30...40    102635.   15860.
## 5 40...       35534.    8482.
xy <- left_join(x, y, by="depth")

# save

#write.csv2(xy, file = "OTUs_and_reads_in_depths.csv", row.names = FALSE)

library("metagMisc")

ps_x <- phyloseq_average(
  ps,
  avg_type = "arithmetic",
  group = "depth",
  drop_group_zero = FALSE,
  verbose = FALSE,
  progress = NULL
)

ps_x
## phyloseq-class experiment-level object
## otu_table()   OTU Table:         [ 20610 taxa and 5 samples ]
## tax_table()   Taxonomy Table:    [ 20610 taxa by 7 taxonomic ranks ]
# 20610 taxa and 5 samples

OTU <- as.data.frame(otu_table(ps_x))
OTU <- t(OTU)


OTU <- as.matrix(OTU)

#convert an abundance matrix to a presence-absence matrix. (make any number greater than 0 into a 1)
OTU[OTU > 0] <- 1 #converts from abundance to P/A
OTU <- as.data.frame(OTU)
# calculate number of different otus in a sample
OTU$OTUs <- rowSums(OTU)

OTU$depth <- rownames(OTU)

OTU <- OTU[, 20611:20612]
print(paste("how many OTUs in total in each soil layer"))
## [1] "how many OTUs in total in each soil layer"
print(OTU)
##          OTUs   depth
## 0...10  14737  0...10
## 10...20 16268 10...20
## 20...30 14763 20...30
## 30...40  5367 30...40
## 40...    2563   40...

Test OTU richness between layers

library(car)
# Using leveneTest()
result = leveneTest(OTUs ~ depth, meta)
# print the result
print(result)
## Levene's Test for Homogeneity of Variance (center = median)
##        Df F value    Pr(>F)    
## group   4  16.055 8.907e-11 ***
##       135                      
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# not homogenous


# perform the Kruskal test
kruskal.test(OTUs ~ depth, data = meta)
## 
##  Kruskal-Wallis rank sum test
## 
## data:  OTUs by depth
## Kruskal-Wallis chi-squared = 91.593, df = 4, p-value < 2.2e-16
pairwise.wilcox.test(meta$OTUs, meta$depth,
                     p.adjust.method = "BH")
## 
##  Pairwise comparisons using Wilcoxon rank sum test with continuity correction 
## 
## data:  meta$OTUs and meta$depth 
## 
##         0...10  10...20 20...30 30...40
## 10...20 0.30647 -       -       -      
## 20...30 0.33226 0.02782 -       -      
## 30...40 2.4e-09 8.9e-10 4.1e-07 -      
## 40...   8.9e-10 8.9e-10 1.7e-09 0.00065
## 
## P value adjustment method: BH

RESULTS STEP 10: Richness figures and statistical testing

library("ggpubr")
library('phyloseq')
library("cowplot")
library("dplyr")
library("ggplot2")
library("vegan")
library("microbiome")
library("tibble")
library(stringr)
library(reshape2)
library(tidyr)


setwd('C:\\Users\\lehakkin\\PhD\\Fungal_ITS_soil_vertical_profile')

load(file = 'ps_FG_with_NAs')#ps_FG
ps_FG
## phyloseq-class experiment-level object
## otu_table()   OTU Table:         [ 20610 taxa and 140 samples ]
## sample_data() Sample Data:       [ 140 samples by 27 sample variables ]
## tax_table()   Taxonomy Table:    [ 20610 taxa by 11 taxonomic ranks ]
load('ps_FINAL')
ps
## phyloseq-class experiment-level object
## otu_table()   OTU Table:         [ 20610 taxa and 140 samples ]
## sample_data() Sample Data:       [ 140 samples by 28 sample variables ]
## tax_table()   Taxonomy Table:    [ 20610 taxa by 7 taxonomic ranks ]
meta <- meta(ps)

1. Calculate FUNGuild richnesses

1.1. Saprotroph

x_sub <- subset_taxa(ps_FG, trophicMode %in% c("Saprotroph"))
x_sub
## phyloseq-class experiment-level object
## otu_table()   OTU Table:         [ 4842 taxa and 140 samples ]
## sample_data() Sample Data:       [ 140 samples by 27 sample variables ]
## tax_table()   Taxonomy Table:    [ 4842 taxa by 11 taxonomic ranks ]
# 4842 taxa and 140 samples

# Save output of `plot_richness` as a variable
richness <- plot_richness(x_sub, x = "sample_type", measures = c("Observed"), color = "depth")

# get the data into data frame
richness_df <- richness$data

# the observed richness is in the column "value", lets change this to saprotroph_richness
colnames(richness_df)[30] <- "saprotroph_richness"
# and lets remove the "variable" and "se" columns
richness_df <- subset(richness_df, select = -variable)
richness_df <- subset(richness_df, select = -se)

FG_richness <- richness_df

1.2. Symbiotroph

x_sub <- subset_taxa(ps_FG, trophicMode %in% c("Symbiotroph"))
x_sub
## phyloseq-class experiment-level object
## otu_table()   OTU Table:         [ 784 taxa and 140 samples ]
## sample_data() Sample Data:       [ 140 samples by 27 sample variables ]
## tax_table()   Taxonomy Table:    [ 784 taxa by 11 taxonomic ranks ]
# 784 taxa and 140 samples



# Save output of `plot_richness` as a variable
richness <- plot_richness(x_sub, x = "sample_type", measures = c("Observed"), color = "depth")

# get the data
richness_df <- richness$data

# the observed richness is in the column "value", lets change this to symbiotroph_richness
colnames(richness_df)[30] <- "symbiotroph_richness"
# and lets remove extra columns
richness_df <- richness_df[, c(1, 30)]

# combine
FG_richness <- dplyr::left_join(FG_richness, richness_df, by = "sampleID")
FG_richness <- subset(FG_richness, select = -samples)

# sampleID into rownames
rownames(FG_richness) <- FG_richness[,1]

1.3. Pathotroph

x_sub <- subset_taxa(ps_FG, trophicMode %in% c("Pathotroph"))
x_sub
## phyloseq-class experiment-level object
## otu_table()   OTU Table:         [ 1500 taxa and 140 samples ]
## sample_data() Sample Data:       [ 140 samples by 27 sample variables ]
## tax_table()   Taxonomy Table:    [ 1500 taxa by 11 taxonomic ranks ]
# 1500 taxa and 140 samples



# Save output of `plot_richness` as a variable
richness <- plot_richness(x_sub, x = "sample_type", measures = c("Observed"), color = "depth")

# get the data
richness_df <- richness$data

# the observed richness is in the column "value", lets change this to pathotroph_richness
colnames(richness_df)[30] <- "pathotroph_richness"
# and lets remove extra columns
richness_df <- richness_df[, c(1, 30)]

# combine
FG_richness <- dplyr::left_join(FG_richness, richness_df, by = "sampleID")


# sampleID into rownames
rownames(FG_richness) <- FG_richness[,1]

2. Calculate AMF (Glomeromycota) Richness

Note! AMF richness calculated from FUNGuild is exactly the same (not shown here)!

x_sub <- subset_taxa(ps, phylum %in% c("Glomeromycota"))
x_sub
## phyloseq-class experiment-level object
## otu_table()   OTU Table:         [ 263 taxa and 140 samples ]
## sample_data() Sample Data:       [ 140 samples by 28 sample variables ]
## tax_table()   Taxonomy Table:    [ 263 taxa by 7 taxonomic ranks ]
# 263 taxa and 140 samples



# Save output of `plot_richness` as a variable
richness <- plot_richness(x_sub, x = "sample_type", measures = c("Observed"), color = "depth")

# get the data
richness_df <- richness$data

# the observed richness is in the column "value", lets change this to AMF_richness
colnames(richness_df)[31] <- "AMF_richness"
# and lets remove extra columns
richness_df <- richness_df[, c(1, 31)]

# combine
FG_richness <- dplyr::left_join(FG_richness, richness_df, by = "sampleID")

# sampleID into rownames
rownames(FG_richness) <- FG_richness[,1]

# I actually want the meta data from the ps rather than the ps_FG, so leyt's remove extra columns

FG_richness <- FG_richness[, c(1, 28:31)]

meta <- dplyr::left_join(meta, FG_richness, by = "sampleID")

# sampleID into rownames
rownames(meta) <- meta[,1]

3. Save new richness measures in meta data

sample_data(ps) <- sample_data(meta)

setwd('C:\\Users\\lehakkin\\PhD\\Fungal_ITS_soil_vertical_profile')

save(ps, file = 'ps_FINAL')
ps
## phyloseq-class experiment-level object
## otu_table()   OTU Table:         [ 20610 taxa and 140 samples ]
## sample_data() Sample Data:       [ 140 samples by 32 sample variables ]
## tax_table()   Taxonomy Table:    [ 20610 taxa by 7 taxonomic ranks ]

4. Richness plots

Note! The CLD letters that will be added to the fiugures are based on statistical test results done at step 6.

4.1. Fungal richness

# create your own color palette for sample types
MyPalette <- c(forest = "#1167b1", meadow = "#fbc02d", organic = "#8a8a8a", conventional =  "#b71c1c")

# OTU richness
OTU_rich <- meta %>%
  group_by(sample_type, depth_numerical) %>%
  summarise(mean = mean(observed, na.rm = TRUE), se = (sd(observed, na.rm = TRUE)/sqrt(length((observed))))) %>%
  ungroup() %>%
  ggplot(aes(y=mean, depth_numerical, color=sample_type)) +
  geom_line(linetype = "dashed") +
  geom_point(size=3, position=position_dodge(1.2)) +
  geom_errorbar(aes(ymin=mean-se, ymax=mean+se), width=.5, position=position_dodge(1.2)) + coord_flip() + scale_x_reverse() + theme(plot.title = element_text(size = 20, hjust = 0.5)) + theme(panel.border = element_rect(colour = "black", fill=NA, size=0.5)) + theme_cowplot() +
  theme(axis.text = element_text(size=16),
        axis.title = element_text(size=16),
        legend.text = element_text(size=16),
        legend.title = element_text(size=16),
        title = element_text(size=18)) +
  scale_y_continuous(name="Fungal richness") +
  labs(x = "depth") + theme(legend.title = element_blank()) + scale_colour_manual(values = MyPalette)
print(OTU_rich)

Change to thousands of reads (x1000)

rich_k <- meta %>% dplyr::mutate(richness_k = observed/1000 ) %>%
  group_by(sample_type, depth_numerical) %>%
  summarise(mean = mean(richness_k, na.rm = TRUE), se = (sd(richness_k, na.rm = TRUE)/sqrt(length((richness_k))))) %>%
  ungroup() %>%
  ggplot(aes(y=mean, depth_numerical, color=sample_type)) +
  geom_line(linetype = "dashed") +
  geom_point(size=3, position=position_dodge(1.2)) +
  geom_errorbar(aes(ymin=mean-se, ymax=mean+se), width=.5, position=position_dodge(1.2)) + coord_flip() + scale_x_reverse() + theme(plot.title = element_text(size = 20, hjust = 0.5)) + theme(panel.border = element_rect(colour = "black", fill=NA, size=0.5)) + theme_cowplot() +
  theme(axis.text = element_text(size=16),
        axis.title = element_text(size=16),
        legend.text = element_text(size=16),
        legend.title = element_text(size=16),
        title = element_text(size=18)) +
  scale_y_continuous(name="Fungal richness \n (x1000)") +
  labs(x = "depth") + theme(legend.title = element_blank()) + scale_colour_manual(values = MyPalette)

rich_k

4.1.1 change labels

rich_k <- meta %>% dplyr::mutate(richness_k = observed/1000 ) %>%
  group_by(sample_type, depth_numerical) %>%
  summarise(mean = mean(richness_k, na.rm = TRUE), se = (sd(richness_k, na.rm = TRUE)/sqrt(length((richness_k))))) %>%
  ungroup() %>%
  ggplot(aes(y=mean, depth_numerical, color=sample_type)) +
  geom_line(linetype = "dashed") +
  geom_point(size=3, position=position_dodge(1.2)) +
  geom_errorbar(aes(ymin=mean-se, ymax=mean+se), width=.5, position=position_dodge(1.2)) + theme(plot.title = element_text(size = 20, hjust = 0.5)) + theme(panel.border = element_rect(colour = "black", fill=NA, size=0.5)) + theme_cowplot() +
  theme(axis.text = element_text(size=16),
        axis.title = element_text(size=16),
        legend.text = element_text(size=16),
        legend.title = element_text(size=16),
        title = element_text(size=18)) +
  scale_y_continuous(name="Fungal richness \n (x1000)") +
  labs(x = NULL) + theme(legend.title = element_blank()) + scale_colour_manual(values = MyPalette)

rich_k <- rich_k + scale_x_reverse(breaks = c(60, 35, 25, 15, 5), labels = c("40-80 cm", "30-40 cm", "20-30 cm", "10-20 cm", "0-10 cm"))

rich_k <- rich_k + coord_flip()
rich_k

rich_k <- meta %>% dplyr::mutate(richness_k = observed/1000 ) %>%
  group_by(sample_type, depth_numerical) %>%
  summarise(mean = mean(richness_k, na.rm = TRUE), se = (sd(richness_k, na.rm = TRUE)/sqrt(length((richness_k))))) %>%
  ungroup() %>%
  ggplot(aes(y=mean, depth_numerical, color=sample_type)) +
  geom_line(linetype = "dashed") +
  geom_point(size=3, position=position_dodge(1.2)) +
  geom_errorbar(aes(ymin=mean-se, ymax=mean+se), width=.5, position=position_dodge(1.2)) + theme(plot.title = element_text(size = 20, hjust = 0.5)) + theme(panel.border = element_rect(colour = "black", fill=NA, size=0.5)) + theme_cowplot() +
  theme(axis.text = element_text(size=16),
        axis.title = element_text(size=16),
        legend.text = element_text(size=16),
        legend.title = element_text(size=16),
        title = element_text(size=18)) +
  scale_y_continuous(name="Fungal richness \n (x1000)") +
  labs(x = NULL) + theme(legend.title = element_blank()) + scale_colour_manual(values = MyPalette)

rich_k <- rich_k + scale_x_reverse(breaks = c(60, 35, 25, 15, 5), labels = c("40-80 cm", "30-40 cm", "20-30 cm", "10-20 cm", "0-10 cm"))

rich_k <- rich_k +
  annotate("text", x=1.5, y=1.5, label= "b", col="#1167b1", size=3) +
  annotate("text", x=1.5, y=1.1, label= "a", col="#fbc02d", size=3) +
  annotate("text", x=1.5, y=3, label= "c", col="#8a8a8a", size=3) +
  annotate("text", x=1.5, y=2.6, label= "c", col="#b71c1c", size=3) +
  annotate("text", x=11, y=1.4, label= "a", col="#1167b1", size=3) +
  annotate("text", x=11, y=2.0, label= "ab", col="#fbc02d", size=3) +
  annotate("text", x=11, y=2.55, label= "ab", col="#8a8a8a", size=3) +
  annotate("text", x=11, y=3.0, label= "b", col="#b71c1c", size=3) +
  annotate("text", x=25, y=0.2, label= "(ns)", size=3) +
  annotate("text", x=35, y=1.2, label= "(ns)", size=3) +
  annotate("text", x=59, y=0.7, label= "ab", col="#1167b1", size=3) +
  annotate("text", x=59, y=1.2, label= "ab", col="#fbc02d", size=3) +
  annotate("text", x=59, y=1.45, label= "b", col="#8a8a8a", size=3) +
  annotate("text", x=59, y=0.95, label= "a", col="#b71c1c", size=3)

rich_k <- rich_k + coord_flip()
rich_k

4.2. AMF richness

gm_rich <- meta %>%
  group_by(sample_type, depth_numerical) %>%
  summarise(mean = mean(AMF_richness, na.rm = TRUE), se = (sd(AMF_richness, na.rm = TRUE)/sqrt(length((AMF_richness))))) %>%
  ungroup() %>%
  ggplot(aes(y=mean, depth_numerical, color=sample_type)) +
  geom_line(linetype = "dashed") +
  geom_point(size=3, position=position_dodge(1.2)) +
  geom_errorbar(aes(ymin=mean-se, ymax=mean+se), width=.5, position=position_dodge(1.2)) + coord_flip() + scale_x_reverse() + theme(plot.title = element_text(size = 20, hjust = 0.5)) + theme(panel.border = element_rect(colour = "black", fill=NA, size=0.5)) + theme_cowplot() +
  theme(axis.text = element_text(size=16),
        axis.title = element_text(size=16),
        legend.text = element_text(size=16),
        legend.title = element_text(size=16),
        title = element_text(size=18)) +
  scale_y_continuous(name="AMF richness \n") +
  labs(x = "depth") + theme(legend.title = element_blank()) + scale_colour_manual(values = MyPalette)

gm_rich

4.2.1 change labels

gm_rich <- meta %>%
  group_by(sample_type, depth_numerical) %>%
  summarise(mean = mean(AMF_richness, na.rm = TRUE), se = (sd(AMF_richness, na.rm = TRUE)/sqrt(length((AMF_richness))))) %>%
  ungroup() %>%
  ggplot(aes(y=mean, depth_numerical, color=sample_type)) +
  geom_line(linetype = "dashed") +
  geom_point(size=3, position=position_dodge(1.2)) +
  geom_errorbar(aes(ymin=mean-se, ymax=mean+se), width=.5, position=position_dodge(1.2)) + theme(plot.title = element_text(size = 20, hjust = 0.5)) + theme(panel.border = element_rect(colour = "black", fill=NA, size=0.5)) + theme_cowplot() +
  theme(axis.text = element_text(size=16),
        axis.title = element_text(size=16),
        legend.text = element_text(size=16),
        legend.title = element_text(size=16),
        title = element_text(size=18)) +
  scale_y_continuous(name="AMF richness \n") +
  labs(x = NULL) + theme(legend.title = element_blank()) + scale_colour_manual(values = MyPalette)

gm_rich <- gm_rich + scale_x_reverse(breaks = c(60, 35, 25, 15, 5), labels = c("40-80 cm", "30-40 cm", "20-30 cm", "10-20 cm", "0-10 cm"))


gm_rich <- gm_rich +
  annotate("text", x=1.5, y=7, label= "a", col="#1167b1", size=3) +
  annotate("text", x=1.5, y=12, label= "ab", col="#fbc02d", size=3) +
  annotate("text", x=1.5, y=17, label= "ab", col="#8a8a8a", size=3) +
  annotate("text", x=1.5, y=22, label= "b", col="#b71c1c", size=3) +
  annotate("text", x=11, y=11, label= "a", col="#1167b1", size=3) +
  annotate("text", x=11, y=30, label= "b", col="#fbc02d", size=3) +
  annotate("text", x=11, y=23, label= "ab", col="#8a8a8a", size=3) +
  annotate("text", x=11, y=27, label= "b", col="#b71c1c", size=3) +
  annotate("text", x=21, y=9, label= "a", col="#1167b1", size=3) +
  annotate("text", x=21, y=37, label= "b", col="#fbc02d", size=3) +
  annotate("text", x=21, y=27, label= "ab", col="#8a8a8a", size=3) +
  annotate("text", x=21, y=20, label= "a", col="#b71c1c", size=3) +
  annotate("text", x=31, y=8, label= "ab", col="#1167b1", size=3) +
  annotate("text", x=31, y=34, label= "b", col="#fbc02d", size=3) +
  annotate("text", x=31, y=17, label= "ab", col="#8a8a8a", size=3) +
  annotate("text", x=31, y=4, label= "a", col="#b71c1c", size=3) +
  annotate("text", x=59, y=13, label= "(ns)", size=3)

gm_rich <- gm_rich + coord_flip()
gm_rich

4.3. Saprotroph richness

Saprotroph_rich <- meta %>%
  group_by(sample_type, depth_numerical) %>%
  summarise(mean = mean(saprotroph_richness, na.rm = TRUE), se = (sd(saprotroph_richness, na.rm = TRUE)/sqrt(length((saprotroph_richness))))) %>%
  ungroup() %>%
  ggplot(aes(y=mean, depth_numerical, color=sample_type)) +
  geom_line(linetype = "dashed") +
  geom_point(size=3, position=position_dodge(1.2)) +
  geom_errorbar(aes(ymin=mean-se, ymax=mean+se), width=.5, position=position_dodge(1.2)) + coord_flip() + scale_x_reverse() + theme(plot.title = element_text(size = 20, hjust = 0.5)) + theme(panel.border = element_rect(colour = "black", fill=NA, size=0.5)) + theme_cowplot() +
  theme(axis.text = element_text(size=16),
        axis.title = element_text(size=16),
        legend.text = element_text(size=16),
        legend.title = element_text(size=16),
        title = element_text(size=18)) +
  scale_y_continuous(name="Saprotroph richness \n") +
  labs(x = "depth") + theme(legend.title = element_blank()) + scale_colour_manual(values = MyPalette)


Saprotroph_rich

4.3.1 change labels

Saprotroph_rich <- meta %>%
  group_by(sample_type, depth_numerical) %>%
  summarise(mean = mean(saprotroph_richness, na.rm = TRUE), se = (sd(saprotroph_richness, na.rm = TRUE)/sqrt(length((saprotroph_richness))))) %>%
  ungroup() %>%
  ggplot(aes(y=mean, depth_numerical, color=sample_type)) +
  geom_line(linetype = "dashed") +
  geom_point(size=3, position=position_dodge(1.2)) +
  geom_errorbar(aes(ymin=mean-se, ymax=mean+se), width=.5, position=position_dodge(1.2)) + theme(plot.title = element_text(size = 20, hjust = 0.5)) + theme(panel.border = element_rect(colour = "black", fill=NA, size=0.5)) + theme_cowplot() +
  theme(axis.text = element_text(size=16),
        axis.title = element_text(size=16),
        legend.text = element_text(size=16),
        legend.title = element_text(size=16),
        title = element_text(size=18)) +
  scale_y_continuous(name="Saprotroph richness \n") +
  labs(x = NULL) + theme(legend.title = element_blank()) + scale_colour_manual(values = MyPalette)

Saprotroph_rich <- Saprotroph_rich + scale_x_reverse(breaks = c(60, 35, 25, 15, 5), labels = c("40-80 cm", "30-40 cm", "20-30 cm", "10-20 cm", "0-10 cm"))

Saprotroph_rich <- Saprotroph_rich +
  annotate("text", x=1.5, y=450, label= "a", col="#1167b1", size=3) +
  annotate("text", x=1.5, y=550, label= "a", col="#fbc02d", size=3) +
  annotate("text", x=1.5, y=830, label= "b", col="#8a8a8a", size=3) +
  annotate("text", x=1.5, y=690, label= "ab", col="#b71c1c", size=3) +
  annotate("text", x=11, y=490, label= "a", col="#1167b1", size=3) +
  annotate("text", x=11, y=900, label= "b", col="#fbc02d", size=3) +
  annotate("text", x=11, y=680, label= "ab", col="#8a8a8a", size=3) +
  annotate("text", x=11, y=800, label= "ab", col="#b71c1c", size=3) +
  annotate("text", x=21, y=220, label= "a", col="#1167b1", size=3) +
  annotate("text", x=21, y=770, label= "b", col="#fbc02d", size=3) +
  annotate("text", x=21, y=600, label= "ab", col="#8a8a8a", size=3) +
  annotate("text", x=21, y=450, label= "ab", col="#b71c1c", size=3) +
  annotate("text", x=35, y=450, label= "(ns)", size=3) +
  annotate("text", x=59, y=200, label= "(ns)", size=3)

Saprotroph_rich <- Saprotroph_rich + coord_flip()
Saprotroph_rich

4.4. Symbiotroph richness

symb_rich <- meta %>%
  group_by(sample_type, depth_numerical) %>%
  summarise(mean = mean(symbiotroph_richness, na.rm = TRUE), se = (sd(symbiotroph_richness, na.rm = TRUE)/sqrt(length((symbiotroph_richness))))) %>%
  ungroup() %>%
  ggplot(aes(y=mean, depth_numerical, color=sample_type)) +
  geom_line(linetype = "dashed") +
  geom_point(size=3, position=position_dodge(1.2)) +
  geom_errorbar(aes(ymin=mean-se, ymax=mean+se), width=.5, position=position_dodge(1.2)) + coord_flip() + scale_x_reverse() + theme(plot.title = element_text(size = 20, hjust = 0.5)) + theme(panel.border = element_rect(colour = "black", fill=NA, size=0.5)) + theme_cowplot() +
  theme(axis.text = element_text(size=16),
        axis.title = element_text(size=16),
        legend.text = element_text(size=16),
        legend.title = element_text(size=16),
        title = element_text(size=18)) +
  scale_y_continuous(name="Symbiotroph richness \n") +
  labs(x = "depth") + theme(legend.title = element_blank()) + scale_colour_manual(values = MyPalette)


symb_rich

4.4.1 change labels

symb_rich <- meta %>%
  group_by(sample_type, depth_numerical) %>%
  summarise(mean = mean(symbiotroph_richness, na.rm = TRUE), se = (sd(symbiotroph_richness, na.rm = TRUE)/sqrt(length((symbiotroph_richness))))) %>%
  ungroup() %>%
  ggplot(aes(y=mean, depth_numerical, color=sample_type)) +
  geom_line(linetype = "dashed") +
  geom_point(size=3, position=position_dodge(1.2)) +
  geom_errorbar(aes(ymin=mean-se, ymax=mean+se), width=.5, position=position_dodge(1.2)) + theme(plot.title = element_text(size = 20, hjust = 0.5)) + theme(panel.border = element_rect(colour = "black", fill=NA, size=0.5)) + theme_cowplot() +
  theme(axis.text = element_text(size=16),
        axis.title = element_text(size=16),
        legend.text = element_text(size=16),
        legend.title = element_text(size=16),
        title = element_text(size=18)) +
  scale_y_continuous(name="Symbiotroph richness \n") +
  labs(x = NULL) + theme(legend.title = element_blank()) + scale_colour_manual(values = MyPalette)

symb_rich <- symb_rich + scale_x_reverse(breaks = c(60, 35, 25, 15, 5), labels = c("40-80 cm", "30-40 cm", "20-30 cm", "10-20 cm", "0-10 cm"))

symb_rich <- symb_rich + coord_flip()
symb_rich

4.5. Pathotroph richness

path_rich <- meta %>%
  group_by(sample_type, depth_numerical) %>%
  summarise(mean = mean(pathotroph_richness, na.rm = TRUE), se = (sd(pathotroph_richness, na.rm = TRUE)/sqrt(length((pathotroph_richness))))) %>%
  ungroup() %>%
  ggplot(aes(y=mean, depth_numerical, color=sample_type)) +
  geom_line(linetype = "dashed") +
  geom_point(size=3, position=position_dodge(1.2)) +
  geom_errorbar(aes(ymin=mean-se, ymax=mean+se), width=.5, position=position_dodge(1.2)) + coord_flip() + scale_x_reverse() + theme(plot.title = element_text(size = 20, hjust = 0.5)) + theme(panel.border = element_rect(colour = "black", fill=NA, size=0.5)) + theme_cowplot() +
  theme(axis.text = element_text(size=16),
        axis.title = element_text(size=16),
        legend.text = element_text(size=16),
        legend.title = element_text(size=16),
        title = element_text(size=18)) +
  scale_y_continuous(name="Pathotroph richness \n") +
  labs(x = "depth") + theme(legend.title = element_blank()) + scale_colour_manual(values = MyPalette)


path_rich

4.5.1 change labels

path_rich <- meta %>%
  group_by(sample_type, depth_numerical) %>%
  summarise(mean = mean(pathotroph_richness, na.rm = TRUE), se = (sd(pathotroph_richness, na.rm = TRUE)/sqrt(length((pathotroph_richness))))) %>%
  ungroup() %>%
  ggplot(aes(y=mean, depth_numerical, color=sample_type)) +
  geom_line(linetype = "dashed") +
  geom_point(size=3, position=position_dodge(1.2)) +
  geom_errorbar(aes(ymin=mean-se, ymax=mean+se), width=.5, position=position_dodge(1.2)) + theme(plot.title = element_text(size = 20, hjust = 0.5)) + theme(panel.border = element_rect(colour = "black", fill=NA, size=0.5)) + theme_cowplot() +
  theme(axis.text = element_text(size=16),
        axis.title = element_text(size=16),
        legend.text = element_text(size=16),
        legend.title = element_text(size=16),
        title = element_text(size=18)) +
  scale_y_continuous(name="Pathotroph richness \n") +
  labs(x = NULL) + theme(legend.title = element_blank()) + scale_colour_manual(values = MyPalette)

path_rich <- path_rich + scale_x_reverse(breaks = c(60, 35, 25, 15, 5), labels = c("40-80 cm", "30-40 cm", "20-30 cm", "10-20 cm", "0-10 cm"))

path_rich <- path_rich +
  annotate("text", x=1.5, y=90, label= "a", col="#1167b1", size=3) +
  annotate("text", x=1.5, y=60, label= "a", col="#fbc02d", size=3) +
  annotate("text", x=1.5, y=310, label= "b", col="#8a8a8a", size=3) +
  annotate("text", x=1.5, y=330, label= "b", col="#b71c1c", size=3) +
  annotate("text", x=11, y=56, label= "a", col="#1167b1", size=3) +
  annotate("text", x=11, y=118, label= "a", col="#fbc02d", size=3) +
  annotate("text", x=11, y=275, label= "b", col="#8a8a8a", size=3) +
  annotate("text", x=11, y=331, label= "b", col="#b71c1c", size=3) +
  annotate("text", x=25, y=250, label= "(ns)", size=3) +
  annotate("text", x=35, y=70, label= "(ns)", size=3) +
  annotate("text", x=59, y=50, label= "(ns)", size=3)

path_rich <- path_rich + coord_flip()
path_rich

5. Combine selected figures

figure <- ggarrange(rich_k, gm_rich + rremove("ylab") + rremove("y.text") + rremove("y.axis") + rremove("y.ticks"), Saprotroph_rich + rremove("ylab") + rremove("y.text") + rremove("y.axis") + rremove("y.ticks"), path_rich + rremove("ylab") + rremove("y.text") + rremove("y.axis") + rremove("y.ticks"),
                    labels = c("A", "B", "C", "D"),
                    ncol = 4, nrow = 1, common.legend = TRUE, legend = "right", widths = c(1.25, 0.9, 0.9, 0.9))

figure

6. Test richnes between management types in all soil layers

library("multcomp")

Change depth and richness measure accordingly

Test these: - observed - AMF_richness - saprotroph_richness - pathotroph_richness

library(car)

means_and_ses <- list()

meta$depth <- as.factor(meta$depth)
rich <-c("observed", "AMF_richness", "saprotroph_richness", "pathotroph_richness")

for (i in rich) {
  for (j in levels(meta$depth)) {
    df <- filter(meta, depth==j)
    
     print(i)
     print(j)
     # Levene's test for homogeneity of variance
    result <- leveneTest(df[[i]] ~ sample_type, data = df)
    print(result)
     
    # Kruskal-Wallis test
    k <- kruskal.test(df[[i]] ~ sample_type, data = df)
    print(k)
    
    # Pairwise Wilcoxon test
    w <- pairwise.wilcox.test(df[[i]], df$sample_type, p.adjust.method = "BH")
    print(w)
    
     # ANOVA
    res.aov <- aov(df[[i]] ~ sample_type, data = df)
    aov_summary <- summary(res.aov)
    print(aov_summary)
    
    # Tukey's HSD test
    tukey <- TukeyHSD(res.aov)
    print(tukey)
    
    # Group by and summarize
    mean_and_se <- df %>%
      group_by(sample_type) %>%
      summarise(mean = mean(!!sym(i), na.rm = TRUE), se = sd(!!sym(i), na.rm = TRUE) / sqrt(n()))
  
    # Store the result in the list with a descriptive name
    result_name <- paste("depth", j, "diversity", i, sep = "_")
    means_and_ses[[result_name]] <- mean_and_se
    
  }
}
## [1] "observed"
## [1] "0...10"
## Levene's Test for Homogeneity of Variance (center = median)
##       Df F value Pr(>F)
## group  3  1.0589 0.3849
##       24               
## 
##  Kruskal-Wallis rank sum test
## 
## data:  df[[i]] by sample_type
## Kruskal-Wallis chi-squared = 17.971, df = 3, p-value = 0.0004458
## 
## 
##  Pairwise comparisons using Wilcoxon rank sum exact test 
## 
## data:  df[[i]] and df$sample_type 
## 
##              forest  meadow  organic
## meadow       0.59636 -       -      
## organic      0.03636 0.00093 -      
## conventional 0.03636 0.00093 0.67297
## 
## P value adjustment method: BH 
##             Df   Sum Sq Mean Sq F value   Pr(>F)    
## sample_type  3 17009022 5669674   16.24 5.61e-06 ***
## Residuals   24  8379531  349147                     
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##   Tukey multiple comparisons of means
##     95% family-wise confidence level
## 
## Fit: aov(formula = df[[i]] ~ sample_type, data = df)
## 
## $sample_type
##                           diff        lwr       upr     p adj
## meadow-forest        -345.2917 -1448.8240  758.2407 0.8235682
## organic-forest       1417.2083   313.6760 2520.7407 0.0084047
## conventional-forest  1246.1111   159.4276 2332.7947 0.0204125
## organic-meadow       1762.5000   947.4873 2577.5127 0.0000209
## conventional-meadow  1591.4028   799.3528 2383.4527 0.0000593
## conventional-organic -171.0972  -963.1472  620.9527 0.9323217
## 
## [1] "observed"
## [1] "10...20"
## Levene's Test for Homogeneity of Variance (center = median)
##       Df F value Pr(>F)
## group  3  1.6778 0.1983
##       24               
## 
##  Kruskal-Wallis rank sum test
## 
## data:  df[[i]] by sample_type
## Kruskal-Wallis chi-squared = 9.1193, df = 3, p-value = 0.02775
## 
## 
##  Pairwise comparisons using Wilcoxon rank sum exact test 
## 
## data:  df[[i]] and df$sample_type 
## 
##              forest meadow organic
## meadow       0.200  -      -      
## organic      0.068  0.459  -      
## conventional 0.068  0.068  0.541  
## 
## P value adjustment method: BH 
##             Df  Sum Sq Mean Sq F value Pr(>F)  
## sample_type  3 4612134 1537378   4.074 0.0179 *
## Residuals   24 9056485  377354                 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##   Tukey multiple comparisons of means
##     95% family-wise confidence level
## 
## Fit: aov(formula = df[[i]] ~ sample_type, data = df)
## 
## $sample_type
##                           diff       lwr      upr     p adj
## meadow-forest         611.9167 -535.3253 1759.159 0.4695196
## organic-forest       1043.5417 -103.7003 2190.784 0.0838551
## conventional-forest  1288.4444  158.7186 2418.170 0.0212276
## organic-meadow        431.6250 -415.6694 1278.919 0.5083506
## conventional-meadow   676.5278 -146.8943 1499.950 0.1343587
## conventional-organic  244.9028 -578.5193 1068.325 0.8442175
## 
## [1] "observed"
## [1] "20...30"
## Levene's Test for Homogeneity of Variance (center = median)
##       Df F value Pr(>F)
## group  3  2.0857 0.1287
##       24               
## 
##  Kruskal-Wallis rank sum test
## 
## data:  df[[i]] by sample_type
## Kruskal-Wallis chi-squared = 4.6562, df = 3, p-value = 0.1988
## 
## 
##  Pairwise comparisons using Wilcoxon rank sum exact test 
## 
## data:  df[[i]] and df$sample_type 
## 
##              forest meadow organic
## meadow       0.15   -      -      
## organic      0.25   0.96   -      
## conventional 0.42   0.96   0.72   
## 
## P value adjustment method: BH 
##             Df   Sum Sq Mean Sq F value Pr(>F)
## sample_type  3  4742212 1580737   1.755  0.183
## Residuals   24 21619303  900804               
##   Tukey multiple comparisons of means
##     95% family-wise confidence level
## 
## Fit: aov(formula = df[[i]] ~ sample_type, data = df)
## 
## $sample_type
##                           diff        lwr       upr     p adj
## meadow-forest        1334.2083  -438.3317 3106.7484 0.1894002
## organic-forest       1381.5833  -390.9567 3154.1234 0.1662604
## conventional-forest  1032.6667  -712.8101 2778.1435 0.3804286
## organic-meadow         47.3750 -1261.7326 1356.4826 0.9996332
## conventional-meadow  -301.5417 -1573.7656  970.6822 0.9131616
## conventional-organic -348.9167 -1621.1406  923.3072 0.8729624
## 
## [1] "observed"
## [1] "30...40"
## Levene's Test for Homogeneity of Variance (center = median)
##       Df F value  Pr(>F)  
## group  3  2.6803 0.06959 .
##       24                  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
##  Kruskal-Wallis rank sum test
## 
## data:  df[[i]] by sample_type
## Kruskal-Wallis chi-squared = 3.7523, df = 3, p-value = 0.2895
## 
## 
##  Pairwise comparisons using Wilcoxon rank sum exact test 
## 
## data:  df[[i]] and df$sample_type 
## 
##              forest meadow organic
## meadow       0.46   -      -      
## organic      0.46   0.46   -      
## conventional 0.46   0.46   0.67   
## 
## P value adjustment method: BH 
##             Df  Sum Sq Mean Sq F value Pr(>F)
## sample_type  3  986615  328872   1.716   0.19
## Residuals   24 4599845  191660               
##   Tukey multiple comparisons of means
##     95% family-wise confidence level
## 
## Fit: aov(formula = df[[i]] ~ sample_type, data = df)
## 
## $sample_type
##                            diff       lwr       upr     p adj
## meadow-forest         624.41667 -193.1945 1442.0279 0.1795937
## organic-forest        289.91667 -527.6945 1107.5279 0.7630092
## conventional-forest   342.44444 -462.6834 1147.5723 0.6489479
## organic-meadow       -334.50000 -938.3459  269.3459 0.4370977
## conventional-meadow  -281.97222 -868.8049  304.8605 0.5563050
## conventional-organic   52.52778 -534.3049  639.3605 0.9945665
## 
## [1] "observed"
## [1] "40..."
## Levene's Test for Homogeneity of Variance (center = median)
##       Df F value Pr(>F)
## group  3  1.3401 0.2848
##       24               
## 
##  Kruskal-Wallis rank sum test
## 
## data:  df[[i]] by sample_type
## Kruskal-Wallis chi-squared = 10.863, df = 3, p-value = 0.01249
## 
## 
##  Pairwise comparisons using Wilcoxon rank sum test with continuity correction 
## 
## data:  df[[i]] and df$sample_type 
## 
##              forest meadow organic
## meadow       0.153  -      -      
## organic      0.056  0.056  -      
## conventional 0.175  0.963  0.056  
## 
## P value adjustment method: BH 
##             Df Sum Sq Mean Sq F value Pr(>F)  
## sample_type  3 407159  135720   4.089 0.0177 *
## Residuals   24 796553   33190                 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##   Tukey multiple comparisons of means
##     95% family-wise confidence level
## 
## Fit: aov(formula = df[[i]] ~ sample_type, data = df)
## 
## $sample_type
##                            diff         lwr       upr     p adj
## meadow-forest         114.75000 -225.487851 454.98785 0.7889685
## organic-forest        337.87500   -2.362851 678.11285 0.0520810
## conventional-forest    77.11111 -257.931964 412.15419 0.9197120
## organic-meadow        223.12500  -28.157306 474.40731 0.0944700
## conventional-meadow   -37.63889 -281.841396 206.56362 0.9736063
## conventional-organic -260.76389 -504.966396 -16.56138 0.0333018
## 
## [1] "AMF_richness"
## [1] "0...10"
## Levene's Test for Homogeneity of Variance (center = median)
##       Df F value Pr(>F)
## group  3  0.4636 0.7103
##       24               
## 
##  Kruskal-Wallis rank sum test
## 
## data:  df[[i]] by sample_type
## Kruskal-Wallis chi-squared = 8.1971, df = 3, p-value = 0.04211
## 
## 
##  Pairwise comparisons using Wilcoxon rank sum exact test 
## 
## data:  df[[i]] and df$sample_type 
## 
##              forest meadow organic
## meadow       0.33   -      -      
## organic      0.13   0.56   -      
## conventional 0.13   0.13   0.15   
## 
## P value adjustment method: BH 
##             Df Sum Sq Mean Sq F value Pr(>F)  
## sample_type  3  664.5   221.5   3.238 0.0398 *
## Residuals   24 1641.6    68.4                 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##   Tukey multiple comparisons of means
##     95% family-wise confidence level
## 
## Fit: aov(formula = df[[i]] ~ sample_type, data = df)
## 
## $sample_type
##                           diff        lwr      upr     p adj
## meadow-forest         6.708333 -8.7373996 22.15407 0.6339029
## organic-forest        9.083333 -6.3623996 24.52907 0.3855925
## conventional-forest  15.555556  0.3456492 30.76546 0.0437049
## organic-meadow        2.375000 -9.0324298 13.78243 0.9387885
## conventional-meadow   8.847222 -2.2388068 19.93325 0.1515032
## conventional-organic  6.472222 -4.6138068 17.55825 0.3918515
## 
## [1] "AMF_richness"
## [1] "10...20"
## Levene's Test for Homogeneity of Variance (center = median)
##       Df F value Pr(>F)
## group  3  0.1141  0.951
##       24               
## 
##  Kruskal-Wallis rank sum test
## 
## data:  df[[i]] by sample_type
## Kruskal-Wallis chi-squared = 7.4387, df = 3, p-value = 0.05916
## 
## 
##  Pairwise comparisons using Wilcoxon rank sum test with continuity correction 
## 
## data:  df[[i]] and df$sample_type 
## 
##              forest meadow organic
## meadow       0.076  -      -      
## organic      0.170  0.442  -      
## conventional 0.076  0.699  0.433  
## 
## P value adjustment method: BH 
##             Df Sum Sq Mean Sq F value Pr(>F)  
## sample_type  3    894  298.00   4.384 0.0135 *
## Residuals   24   1631   67.97                 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##   Tukey multiple comparisons of means
##     95% family-wise confidence level
## 
## Fit: aov(formula = df[[i]] ~ sample_type, data = df)
## 
## $sample_type
##                            diff        lwr      upr     p adj
## meadow-forest        18.5833333   3.186290 33.98038 0.0139049
## organic-forest       12.4583333  -2.938710 27.85538 0.1432253
## conventional-forest  17.7777778   2.615818 32.93974 0.0173327
## organic-meadow       -6.1250000 -17.496470  5.24647 0.4611598
## conventional-meadow  -0.8055556 -11.856638 10.24553 0.9970402
## conventional-organic  5.3194444  -5.731638 16.37053 0.5548799
## 
## [1] "AMF_richness"
## [1] "20...30"
## Levene's Test for Homogeneity of Variance (center = median)
##       Df F value Pr(>F)
## group  3  2.1721 0.1176
##       24               
## 
##  Kruskal-Wallis rank sum test
## 
## data:  df[[i]] by sample_type
## Kruskal-Wallis chi-squared = 10.239, df = 3, p-value = 0.01664
## 
## 
##  Pairwise comparisons using Wilcoxon rank sum test with continuity correction 
## 
## data:  df[[i]] and df$sample_type 
## 
##              forest meadow organic
## meadow       0.056  -      -      
## organic      0.056  0.154  -      
## conventional 0.404  0.068  0.402  
## 
## P value adjustment method: BH 
##             Df Sum Sq Mean Sq F value  Pr(>F)   
## sample_type  3   2273   757.8   4.859 0.00883 **
## Residuals   24   3743   156.0                   
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##   Tukey multiple comparisons of means
##     95% family-wise confidence level
## 
## Fit: aov(formula = df[[i]] ~ sample_type, data = df)
## 
## $sample_type
##                            diff        lwr        upr     p adj
## meadow-forest         28.708333   5.385511 52.0311555 0.0119094
## organic-forest        18.083333  -5.239489 41.4061555 0.1696484
## conventional-forest   11.222222 -11.744505 34.1889499 0.5427300
## organic-meadow       -10.625000 -27.850046  6.6000457 0.3447892
## conventional-meadow  -17.486111 -34.225847 -0.7463756 0.0383380
## conventional-organic  -6.861111 -23.600847  9.8786244 0.6746486
## 
## [1] "AMF_richness"
## [1] "30...40"
## Levene's Test for Homogeneity of Variance (center = median)
##       Df F value Pr(>F)
## group  3  2.2858 0.1044
##       24               
## 
##  Kruskal-Wallis rank sum test
## 
## data:  df[[i]] by sample_type
## Kruskal-Wallis chi-squared = 9.515, df = 3, p-value = 0.02317
## 
## 
##  Pairwise comparisons using Wilcoxon rank sum exact test 
## 
## data:  df[[i]] and df$sample_type 
## 
##              forest meadow organic
## meadow       0.073  -      -      
## organic      0.643  0.228  -      
## conventional 0.926  0.023  0.643  
## 
## P value adjustment method: BH 
##             Df Sum Sq Mean Sq F value  Pr(>F)   
## sample_type  3   3828  1275.9   5.884 0.00369 **
## Residuals   24   5204   216.8                   
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##   Tukey multiple comparisons of means
##     95% family-wise confidence level
## 
## Fit: aov(formula = df[[i]] ~ sample_type, data = df)
## 
## $sample_type
##                            diff         lwr       upr     p adj
## meadow-forest         26.875000  -0.6265144 54.376514 0.0571204
## organic-forest        10.375000 -17.1265144 37.876514 0.7276472
## conventional-forest   -1.777778 -28.8593971 25.303842 0.9978313
## organic-meadow       -16.500000 -36.8112145  3.811214 0.1408901
## conventional-meadow  -28.652778 -48.3917302 -8.913825 0.0027413
## conventional-organic -12.152778 -31.8917302  7.586175 0.3463849
## 
## [1] "AMF_richness"
## [1] "40..."
## Levene's Test for Homogeneity of Variance (center = median)
##       Df F value Pr(>F)
## group  3   1.591 0.2176
##       24               
## 
##  Kruskal-Wallis rank sum test
## 
## data:  df[[i]] by sample_type
## Kruskal-Wallis chi-squared = 4.4541, df = 3, p-value = 0.2164
## 
## 
##  Pairwise comparisons using Wilcoxon rank sum test with continuity correction 
## 
## data:  df[[i]] and df$sample_type 
## 
##              forest meadow organic
## meadow       0.36   -      -      
## organic      0.36   0.83   -      
## conventional 0.51   0.36   0.36   
## 
## P value adjustment method: BH 
##             Df Sum Sq Mean Sq F value Pr(>F)
## sample_type  3  267.9   89.31   1.488  0.243
## Residuals   24 1440.9   60.04               
##   Tukey multiple comparisons of means
##     95% family-wise confidence level
## 
## Fit: aov(formula = df[[i]] ~ sample_type, data = df)
## 
## $sample_type
##                            diff        lwr       upr     p adj
## meadow-forest         6.0416667  -8.429273 20.512607 0.6619918
## organic-forest        7.4166667  -7.054273 21.887607 0.5032636
## conventional-forest   0.7777778 -13.472219 15.027774 0.9987481
## organic-meadow        1.3750000  -9.312497 12.062497 0.9842975
## conventional-meadow  -5.2638889 -15.650269  5.122492 0.5126347
## conventional-organic -6.6388889 -17.025269  3.747492 0.3148737
## 
## [1] "saprotroph_richness"
## [1] "0...10"
## Levene's Test for Homogeneity of Variance (center = median)
##       Df F value Pr(>F)
## group  3   1.236 0.3185
##       24               
## 
##  Kruskal-Wallis rank sum test
## 
## data:  df[[i]] by sample_type
## Kruskal-Wallis chi-squared = 10.335, df = 3, p-value = 0.01592
## 
## 
##  Pairwise comparisons using Wilcoxon rank sum exact test 
## 
## data:  df[[i]] and df$sample_type 
## 
##              forest meadow organic
## meadow       0.921  -      -      
## organic      0.170  0.046  -      
## conventional 0.283  0.046  0.283  
## 
## P value adjustment method: BH 
##             Df Sum Sq Mean Sq F value  Pr(>F)   
## sample_type  3 552978  184326   5.106 0.00711 **
## Residuals   24 866367   36099                   
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##   Tukey multiple comparisons of means
##     95% family-wise confidence level
## 
## Fit: aov(formula = df[[i]] ~ sample_type, data = df)
## 
## $sample_type
##                            diff          lwr      upr     p adj
## meadow-forest          22.66667 -332.1680785 377.5014 0.9980002
## organic-forest        354.66667   -0.1680785 709.5014 0.0501395
## conventional-forest   219.11111 -130.3059916 568.5282 0.3308818
## organic-meadow        332.00000   69.9371730 594.0628 0.0094204
## conventional-meadow   196.44444  -58.2348452 451.1237 0.1730235
## conventional-organic -135.55556 -390.2348452 119.1237 0.4713122
## 
## [1] "saprotroph_richness"
## [1] "10...20"
## Levene's Test for Homogeneity of Variance (center = median)
##       Df F value Pr(>F)
## group  3  1.3935 0.2689
##       24               
## 
##  Kruskal-Wallis rank sum test
## 
## data:  df[[i]] by sample_type
## Kruskal-Wallis chi-squared = 8.4841, df = 3, p-value = 0.037
## 
## 
##  Pairwise comparisons using Wilcoxon rank sum exact test 
## 
## data:  df[[i]] and df$sample_type 
## 
##              forest meadow organic
## meadow       0.073  -      -      
## organic      0.170  0.193  -      
## conventional 0.073  0.185  0.888  
## 
## P value adjustment method: BH 
##             Df Sum Sq Mean Sq F value Pr(>F)  
## sample_type  3 368992  122997    3.91 0.0209 *
## Residuals   24 754986   31458                 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##   Tukey multiple comparisons of means
##     95% family-wise confidence level
## 
## Fit: aov(formula = df[[i]] ~ sample_type, data = df)
## 
## $sample_type
##                            diff        lwr       upr     p adj
## meadow-forest         405.16667   73.92528 736.40806 0.0125214
## organic-forest        249.79167  -81.44972 581.03306 0.1881316
## conventional-forest   260.11111  -66.07286 586.29508 0.1519732
## organic-meadow       -155.37500 -400.01298  89.26298 0.3201964
## conventional-meadow  -145.05556 -382.80094  92.68983 0.3540753
## conventional-organic   10.31944 -227.42594 248.06483 0.9993682
## 
## [1] "saprotroph_richness"
## [1] "20...30"
## Levene's Test for Homogeneity of Variance (center = median)
##       Df F value Pr(>F)
## group  3  1.4484 0.2535
##       24               
## 
##  Kruskal-Wallis rank sum test
## 
## data:  df[[i]] by sample_type
## Kruskal-Wallis chi-squared = 9.2572, df = 3, p-value = 0.02606
## 
## 
##  Pairwise comparisons using Wilcoxon rank sum exact test 
## 
## data:  df[[i]] and df$sample_type 
## 
##              forest meadow organic
## meadow       0.036  -      -      
## organic      0.267  0.292  -      
## conventional 0.447  0.036  0.541  
## 
## P value adjustment method: BH 
##             Df  Sum Sq Mean Sq F value Pr(>F)  
## sample_type  3  784367  261456   3.683 0.0259 *
## Residuals   24 1703653   70986                 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##   Tukey multiple comparisons of means
##     95% family-wise confidence level
## 
## Fit: aov(formula = df[[i]] ~ sample_type, data = df)
## 
## $sample_type
##                           diff        lwr        upr     p adj
## meadow-forest         542.7083   45.12498 1040.29168 0.0289462
## organic-forest        358.2083 -139.37502  855.79168 0.2210832
## conventional-forest   230.3333 -259.65288  720.31955 0.5737676
## organic-meadow       -184.5000 -551.98966  182.98966 0.5204628
## conventional-meadow  -312.3750 -669.51075   44.76075 0.1015303
## conventional-organic -127.8750 -485.01075  229.26075 0.7576950
## 
## [1] "saprotroph_richness"
## [1] "30...40"
## Levene's Test for Homogeneity of Variance (center = median)
##       Df F value Pr(>F)
## group  3   1.706 0.1924
##       24               
## 
##  Kruskal-Wallis rank sum test
## 
## data:  df[[i]] by sample_type
## Kruskal-Wallis chi-squared = 6.5626, df = 3, p-value = 0.08723
## 
## 
##  Pairwise comparisons using Wilcoxon rank sum exact test 
## 
## data:  df[[i]] and df$sample_type 
## 
##              forest meadow organic
## meadow       0.388  -      -      
## organic      0.864  0.249  -      
## conventional 0.864  0.091  0.482  
## 
## P value adjustment method: BH 
##             Df Sum Sq Mean Sq F value Pr(>F)  
## sample_type  3 192644   64215   3.255 0.0392 *
## Residuals   24 473414   19726                 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##   Tukey multiple comparisons of means
##     95% family-wise confidence level
## 
## Fit: aov(formula = df[[i]] ~ sample_type, data = df)
## 
## $sample_type
##                             diff        lwr        upr     p adj
## meadow-forest         186.708333  -75.59003 449.006696 0.2294520
## organic-forest          2.083333 -260.21503 264.381696 0.9999961
## conventional-forest     5.111111 -253.18246 263.404684 0.9999398
## organic-meadow       -184.625000 -378.34518   9.095179 0.0656538
## conventional-meadow  -181.597222 -369.85940   6.664952 0.0614447
## conventional-organic    3.027778 -185.23440 191.289952 0.9999677
## 
## [1] "saprotroph_richness"
## [1] "40..."
## Levene's Test for Homogeneity of Variance (center = median)
##       Df F value Pr(>F)
## group  3  0.8324 0.4892
##       24               
## 
##  Kruskal-Wallis rank sum test
## 
## data:  df[[i]] by sample_type
## Kruskal-Wallis chi-squared = 3.6785, df = 3, p-value = 0.2983
## 
## 
##  Pairwise comparisons using Wilcoxon rank sum test with continuity correction 
## 
## data:  df[[i]] and df$sample_type 
## 
##              forest meadow organic
## meadow       0.39   -      -      
## organic      0.39   0.76   -      
## conventional 0.39   0.47   0.81   
## 
## P value adjustment method: BH 
##             Df Sum Sq Mean Sq F value Pr(>F)
## sample_type  3   9406    3135   0.892  0.459
## Residuals   24  84343    3514               
##   Tukey multiple comparisons of means
##     95% family-wise confidence level
## 
## Fit: aov(formula = df[[i]] ~ sample_type, data = df)
## 
## $sample_type
##                           diff        lwr       upr     p adj
## meadow-forest         52.66667  -58.04632 163.37965 0.5643329
## organic-forest        47.29167  -63.42132 158.00465 0.6458956
## conventional-forest   20.00000  -89.02261 129.02261 0.9568518
## organic-meadow        -5.37500  -87.14196  76.39196 0.9978224
## conventional-meadow  -32.66667 -112.12987  46.79653 0.6726275
## conventional-organic -27.29167 -106.75487  52.17153 0.7798186
## 
## [1] "pathotroph_richness"
## [1] "0...10"
## Levene's Test for Homogeneity of Variance (center = median)
##       Df F value  Pr(>F)  
## group  3  3.5826 0.02853 *
##       24                  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
##  Kruskal-Wallis rank sum test
## 
## data:  df[[i]] by sample_type
## Kruskal-Wallis chi-squared = 19.378, df = 3, p-value = 0.0002283
## 
## 
##  Pairwise comparisons using Wilcoxon rank sum exact test 
## 
## data:  df[[i]] and df$sample_type 
## 
##              forest  meadow  organic
## meadow       0.93091 -       -      
## organic      0.01818 0.00047 -      
## conventional 0.01818 0.00047 1.00000
## 
## P value adjustment method: BH 
##             Df Sum Sq Mean Sq F value   Pr(>F)    
## sample_type  3 404412  134804   25.49 1.23e-07 ***
## Residuals   24 126933    5289                     
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##   Tukey multiple comparisons of means
##     95% family-wise confidence level
## 
## Fit: aov(formula = df[[i]] ~ sample_type, data = df)
## 
## $sample_type
##                            diff        lwr      upr     p adj
## meadow-forest        -11.041667 -146.86103 124.7777 0.9959110
## organic-forest       234.833333   99.01397 370.6527 0.0004079
## conventional-forest  240.666667  106.92100 374.4123 0.0002508
## organic-meadow       245.875000  145.56575 346.1843 0.0000031
## conventional-meadow  251.708333  154.22526 349.1914 0.0000013
## conventional-organic   5.833333  -91.64974 103.3164 0.9983535
## 
## [1] "pathotroph_richness"
## [1] "10...20"
## Levene's Test for Homogeneity of Variance (center = median)
##       Df F value Pr(>F)
## group  3  2.2339 0.1102
##       24               
## 
##  Kruskal-Wallis rank sum test
## 
## data:  df[[i]] by sample_type
## Kruskal-Wallis chi-squared = 21.058, df = 3, p-value = 0.0001024
## 
## 
##  Pairwise comparisons using Wilcoxon rank sum exact test 
## 
## data:  df[[i]] and df$sample_type 
## 
##              forest  meadow  organic
## meadow       0.02909 -       -      
## organic      0.01818 0.00047 -      
## conventional 0.01818 0.00047 0.13879
## 
## P value adjustment method: BH 
##             Df Sum Sq Mean Sq F value   Pr(>F)    
## sample_type  3 298602   99534   29.77 2.95e-08 ***
## Residuals   24  80239    3343                     
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##   Tukey multiple comparisons of means
##     95% family-wise confidence level
## 
## Fit: aov(formula = df[[i]] ~ sample_type, data = df)
## 
## $sample_type
##                           diff       lwr      upr     p adj
## meadow-forest         61.62500 -46.36138 169.6114 0.4114727
## organic-forest       219.25000 111.26362 327.2364 0.0000513
## conventional-forest  274.66667 168.32903 381.0043 0.0000013
## organic-meadow       157.62500  77.87177 237.3782 0.0000742
## conventional-meadow  213.04167 135.53546 290.5479 0.0000005
## conventional-organic  55.41667 -22.08954 132.9229 0.2261312
## 
## [1] "pathotroph_richness"
## [1] "20...30"
## Levene's Test for Homogeneity of Variance (center = median)
##       Df F value Pr(>F)
## group  3  2.2613 0.1071
##       24               
## 
##  Kruskal-Wallis rank sum test
## 
## data:  df[[i]] by sample_type
## Kruskal-Wallis chi-squared = 8.05, df = 3, p-value = 0.04499
## 
## 
##  Pairwise comparisons using Wilcoxon rank sum exact test 
## 
## data:  df[[i]] and df$sample_type 
## 
##              forest meadow organic
## meadow       0.073  -      -      
## organic      0.073  0.157  -      
## conventional 0.104  0.386  0.888  
## 
## P value adjustment method: BH 
##             Df Sum Sq Mean Sq F value Pr(>F)  
## sample_type  3  93092   31031   2.547 0.0797 .
## Residuals   24 292356   12182                 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##   Tukey multiple comparisons of means
##     95% family-wise confidence level
## 
## Fit: aov(formula = df[[i]] ~ sample_type, data = df)
## 
## $sample_type
##                           diff        lwr      upr     p adj
## meadow-forest         75.58333 -130.54194 281.7086 0.7443125
## organic-forest       171.70833  -34.41694 377.8336 0.1266336
## conventional-forest  157.66667  -45.31147 360.6448 0.1684675
## organic-meadow        96.12500  -56.10861 248.3586 0.3250720
## conventional-meadow   82.08333  -65.86114 230.0278 0.4357382
## conventional-organic -14.04167 -161.98614 133.9028 0.9935429
## 
## [1] "pathotroph_richness"
## [1] "30...40"
## Levene's Test for Homogeneity of Variance (center = median)
##       Df F value Pr(>F)
## group  3  1.1511 0.3488
##       24               
## 
##  Kruskal-Wallis rank sum test
## 
## data:  df[[i]] by sample_type
## Kruskal-Wallis chi-squared = 5.8552, df = 3, p-value = 0.1189
## 
## 
##  Pairwise comparisons using Wilcoxon rank sum test with continuity correction 
## 
## data:  df[[i]] and df$sample_type 
## 
##              forest meadow organic
## meadow       0.18   -      -      
## organic      0.49   0.75   -      
## conventional 0.18   0.18   0.47   
## 
## P value adjustment method: BH 
##             Df Sum Sq Mean Sq F value Pr(>F)
## sample_type  3   1589   529.8    1.81  0.172
## Residuals   24   7024   292.7               
##   Tukey multiple comparisons of means
##     95% family-wise confidence level
## 
## Fit: aov(formula = df[[i]] ~ sample_type, data = df)
## 
## $sample_type
##                           diff        lwr      upr     p adj
## meadow-forest        11.208333 -20.742112 43.15878 0.7687688
## organic-forest       15.958333 -15.992112 47.90878 0.5247314
## conventional-forest  24.444444  -7.018179 55.90707 0.1683269
## organic-meadow        4.750000 -18.846968 28.34697 0.9441916
## conventional-meadow  13.236111  -9.696019 36.16824 0.4016916
## conventional-organic  8.486111 -14.446019 31.41824 0.7390336
## 
## [1] "pathotroph_richness"
## [1] "40..."
## Levene's Test for Homogeneity of Variance (center = median)
##       Df F value Pr(>F)
## group  3    1.33 0.2879
##       24               
## 
##  Kruskal-Wallis rank sum test
## 
## data:  df[[i]] by sample_type
## Kruskal-Wallis chi-squared = 7.1979, df = 3, p-value = 0.06585
## 
## 
##  Pairwise comparisons using Wilcoxon rank sum test with continuity correction 
## 
## data:  df[[i]] and df$sample_type 
## 
##              forest meadow organic
## meadow       0.083  -      -      
## organic      0.083  0.665  -      
## conventional 0.083  0.727  0.665  
## 
## P value adjustment method: BH 
##             Df Sum Sq Mean Sq F value Pr(>F)
## sample_type  3  114.1   38.03   1.368  0.276
## Residuals   24  666.9   27.79               
##   Tukey multiple comparisons of means
##     95% family-wise confidence level
## 
## Fit: aov(formula = df[[i]] ~ sample_type, data = df)
## 
## $sample_type
##                           diff        lwr       upr     p adj
## meadow-forest         4.166667  -5.678021 14.011355 0.6524360
## organic-forest        6.166667  -3.678021 16.011355 0.3317956
## conventional-forest   2.111111  -7.583267 11.805490 0.9308262
## organic-meadow        2.000000  -5.270784  9.270784 0.8719962
## conventional-meadow  -2.055556  -9.121487  5.010376 0.8525672
## conventional-organic -4.055556 -11.121487  3.010376 0.4065242
# Combine list into a single dataframe
combined_df <- bind_rows(lapply(names(means_and_ses), function(name) {
  df <- means_and_ses[[name]]
  df$result_name <- name
  return(df)
}), .id = "id")

# View the combined dataframe
print(combined_df)
## # A tibble: 80 × 5
##    id    sample_type   mean    se result_name                     
##    <chr> <fct>        <dbl> <dbl> <chr>                           
##  1 1     forest       1478. 438.  depth_0...10_diversity_observed 
##  2 1     meadow       1132. 158.  depth_0...10_diversity_observed 
##  3 1     organic      2895. 242.  depth_0...10_diversity_observed 
##  4 1     conventional 2724. 188.  depth_0...10_diversity_observed 
##  5 2     forest       1608.  93.0 depth_10...20_diversity_observed
##  6 2     meadow       2220. 219.  depth_10...20_diversity_observed
##  7 2     organic      2652. 265.  depth_10...20_diversity_observed
##  8 2     conventional 2897. 181.  depth_10...20_diversity_observed
##  9 3     forest        792. 249.  depth_20...30_diversity_observed
## 10 3     meadow       2126. 226.  depth_20...30_diversity_observed
## # ℹ 70 more rows
# save the mean and se values

setwd('C:\\Users\\lehakkin\\PhD\\Fungal_ITS_soil_vertical_profile')

#write.csv2(combined_df, file = "Richness_mean_and_ses.csv")

RESULTS STEP 11: Test between management the 5 most abundant taxa

library('phyloseq')
library("cowplot")
library("dplyr")
library("ggplot2")
library("vegan")
library("microbiome")
library("tibble")
library(stringr)
library(reshape2)
library(tidyr)

setwd('C:\\Users\\lehakkin\\PhD\\Fungal_ITS_soil_vertical_profile')


load('ps_FINAL')
ps
## phyloseq-class experiment-level object
## otu_table()   OTU Table:         [ 20610 taxa and 140 samples ]
## sample_data() Sample Data:       [ 140 samples by 32 sample variables ]
## tax_table()   Taxonomy Table:    [ 20610 taxa by 7 taxonomic ranks ]
ps_RA <- transform(ps, "compositional")

meta <- meta(ps)

1. Five most abundant phyla

ps_RA_x <- aggregate_rare(ps_RA, level = "phylum", detection = 0, prevalence = 0)
ps_RA_x              
## phyloseq-class experiment-level object
## otu_table()   OTU Table:         [ 14 taxa and 140 samples ]
## sample_data() Sample Data:       [ 140 samples by 32 sample variables ]
## tax_table()   Taxonomy Table:    [ 14 taxa by 1 taxonomic ranks ]
# 14 taxa and 140 samples


#create data table
x_df <-  psmelt(ps_RA_x)

x_df$year <- "2019"

x <- x_df %>%
  group_by(year, OTU) %>%
  summarise(mean = mean(Abundance, na.rm = TRUE), se = (sd(Abundance, na.rm = TRUE)/sqrt(length((Abundance)))))

y <- x %>%
    arrange(desc(mean)) %>%
    slice(1:5)
print(y)
## # A tibble: 5 × 4
## # Groups:   year [1]
##   year  OTU                  mean      se
##   <chr> <chr>               <dbl>   <dbl>
## 1 2019  Ascomycota        0.661   0.0163 
## 2 2019  Basidiomycota     0.235   0.0159 
## 3 2019  Mortierellomycota 0.0759  0.00873
## 4 2019  Glomeromycota     0.0240  0.00613
## 5 2019  Rozellomycota     0.00228 0.00109

1.2. Test for each 5 phyla

library(car)

taxa <- y$OTU

for (i in taxa) {
 df <- filter(x_df, OTU == i)
 print(i)
 # Using leveneTest()
result = leveneTest(Abundance ~ sample_type, df)
# print the result
print(result)

# perform the Kruskal test
k <- kruskal.test(Abundance ~ sample_type, data = df)
print(k)

w <- pairwise.wilcox.test(df$Abundance, df$sample_type,
                     p.adjust.method = "BH")
print(w)
}
## [1] "Ascomycota"
## Levene's Test for Homogeneity of Variance (center = median)
##        Df F value Pr(>F)
## group   3  1.5105 0.2146
##       136               
## 
##  Kruskal-Wallis rank sum test
## 
## data:  Abundance by sample_type
## Kruskal-Wallis chi-squared = 27.535, df = 3, p-value = 4.546e-06
## 
## 
##  Pairwise comparisons using Wilcoxon rank sum exact test 
## 
## data:  df$Abundance and df$sample_type 
## 
##              forest  meadow organic
## meadow       5.9e-05 -      -      
## organic      2.2e-07 0.42   -      
## conventional 1.4e-07 0.35   0.70   
## 
## P value adjustment method: BH 
## [1] "Basidiomycota"
## Levene's Test for Homogeneity of Variance (center = median)
##        Df F value Pr(>F)
## group   3  1.2681 0.2879
##       136               
## 
##  Kruskal-Wallis rank sum test
## 
## data:  Abundance by sample_type
## Kruskal-Wallis chi-squared = 26.818, df = 3, p-value = 6.428e-06
## 
## 
##  Pairwise comparisons using Wilcoxon rank sum exact test 
## 
## data:  df$Abundance and df$sample_type 
## 
##              forest  meadow organic
## meadow       1.1e-05 -      -      
## organic      1.1e-05 0.52   -      
## conventional 2.2e-07 0.48   0.21   
## 
## P value adjustment method: BH 
## [1] "Mortierellomycota"
## Levene's Test for Homogeneity of Variance (center = median)
##        Df F value Pr(>F)
## group   3  1.6242 0.1867
##       136               
## 
##  Kruskal-Wallis rank sum test
## 
## data:  Abundance by sample_type
## Kruskal-Wallis chi-squared = 7.126, df = 3, p-value = 0.06799
## 
## 
##  Pairwise comparisons using Wilcoxon rank sum test with continuity correction 
## 
## data:  df$Abundance and df$sample_type 
## 
##              forest meadow organic
## meadow       0.902  -      -      
## organic      0.902  0.902  -      
## conventional 0.180  0.180  0.076  
## 
## P value adjustment method: BH 
## [1] "Glomeromycota"
## Levene's Test for Homogeneity of Variance (center = median)
##        Df F value Pr(>F)
## group   3  1.0182 0.3867
##       136               
## 
##  Kruskal-Wallis rank sum test
## 
## data:  Abundance by sample_type
## Kruskal-Wallis chi-squared = 5.8342, df = 3, p-value = 0.12
## 
## 
##  Pairwise comparisons using Wilcoxon rank sum test with continuity correction 
## 
## data:  df$Abundance and df$sample_type 
## 
##              forest meadow organic
## meadow       0.14   -      -      
## organic      0.27   0.36   -      
## conventional 0.30   0.27   0.91   
## 
## P value adjustment method: BH 
## [1] "Rozellomycota"
## Levene's Test for Homogeneity of Variance (center = median)
##        Df F value   Pr(>F)   
## group   3  4.5426 0.004562 **
##       136                    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
##  Kruskal-Wallis rank sum test
## 
## data:  Abundance by sample_type
## Kruskal-Wallis chi-squared = 11.375, df = 3, p-value = 0.00986
## 
## 
##  Pairwise comparisons using Wilcoxon rank sum test with continuity correction 
## 
## data:  df$Abundance and df$sample_type 
## 
##              forest meadow organic
## meadow       0.120  -      -      
## organic      0.058  0.120  -      
## conventional 0.436  0.120  0.041  
## 
## P value adjustment method: BH
x <- x_df %>%
  filter(OTU %in% taxa) %>% group_by(OTU, sample_type) %>% summarise(mean = mean(Abundance, na.rm = TRUE), se = (sd(Abundance, na.rm = TRUE)/sqrt(length((Abundance)))))
print(x)
## # A tibble: 20 × 4
## # Groups:   OTU [5]
##    OTU               sample_type      mean        se
##    <chr>             <fct>           <dbl>     <dbl>
##  1 Ascomycota        forest       0.402    0.0486   
##  2 Ascomycota        meadow       0.670    0.0293   
##  3 Ascomycota        organic      0.692    0.0264   
##  4 Ascomycota        conventional 0.711    0.0231   
##  5 Basidiomycota     forest       0.533    0.0555   
##  6 Basidiomycota     meadow       0.210    0.0261   
##  7 Basidiomycota     organic      0.220    0.0247   
##  8 Basidiomycota     conventional 0.170    0.0191   
##  9 Glomeromycota     forest       0.00368  0.00177  
## 10 Glomeromycota     meadow       0.0325   0.0121   
## 11 Glomeromycota     organic      0.0333   0.0158   
## 12 Glomeromycota     conventional 0.0149   0.00690  
## 13 Mortierellomycota forest       0.0470   0.0158   
## 14 Mortierellomycota meadow       0.0860   0.0195   
## 15 Mortierellomycota organic      0.0522   0.0120   
## 16 Mortierellomycota conventional 0.0975   0.0167   
## 17 Rozellomycota     forest       0.0131   0.00974  
## 18 Rozellomycota     meadow       0.000315 0.0000708
## 19 Rozellomycota     organic      0.000163 0.0000405
## 20 Rozellomycota     conventional 0.00228  0.000714

1.3. Test phyla in soil layers

1.3.1 meadow, organic and conevntional only

1.3.1.1. Five most abundant phyla

x_df_nf <- subset(x_df, sample_type!="forest")

x_df_nf$depth <- as.factor(x_df_nf$depth)
x_df_nf$year <- "2019"

x <- x_df_nf %>%
  group_by(year, OTU) %>%
  summarise(mean = mean(Abundance, na.rm = TRUE), se = (sd(Abundance, na.rm = TRUE)/sqrt(length((Abundance)))))

y <- x %>%
    arrange(desc(mean)) %>%
    slice(1:5)
print(y)
## # A tibble: 5 × 4
## # Groups:   year [1]
##   year  OTU                  mean       se
##   <chr> <chr>               <dbl>    <dbl>
## 1 2019  Ascomycota        0.692   0.0151  
## 2 2019  Basidiomycota     0.199   0.0134  
## 3 2019  Mortierellomycota 0.0793  0.00956 
## 4 2019  Glomeromycota     0.0264  0.00683 
## 5 2019  Chytridiomycota   0.00209 0.000362
taxa <- y$OTU

x <- x_df_nf %>%
  filter(OTU %in% taxa) %>% group_by(OTU, depth) %>% summarise(mean = mean(Abundance, na.rm = TRUE), se = (sd(Abundance, na.rm = TRUE)/sqrt(length((Abundance)))))
print(x)
## # A tibble: 25 × 4
## # Groups:   OTU [5]
##    OTU           depth    mean     se
##    <chr>         <fct>   <dbl>  <dbl>
##  1 Ascomycota    0...10  0.702 0.0227
##  2 Ascomycota    10...20 0.682 0.0190
##  3 Ascomycota    20...30 0.696 0.0207
##  4 Ascomycota    30...40 0.627 0.0486
##  5 Ascomycota    40...   0.752 0.0431
##  6 Basidiomycota 0...10  0.265 0.0198
##  7 Basidiomycota 10...20 0.283 0.0188
##  8 Basidiomycota 20...30 0.196 0.0248
##  9 Basidiomycota 30...40 0.114 0.0320
## 10 Basidiomycota 40...   0.136 0.0364
## # ℹ 15 more rows
setwd('C:\\Users\\lehakkin\\PhD\\Fungal_ITS_soil_vertical_profile')

#write.csv2(x, file = "5_Phyla_in_soil_layers_WITHOUT_forest_mean.csv")
library(car)
library("rcompanion")
library("multcompView")

taxa <- y$OTU

for (i in taxa) {
  df <- subset(x_df_nf, OTU==i)
  # Using leveneTest()
  print(i)
result = leveneTest(Abundance ~ depth, df)
# print the result
print(result)

# perform the Kruskal test
k <- kruskal.test(Abundance ~ depth, data = df)
print(k)

wilcox.res <- pairwise.wilcox.test(df$Abundance, df$depth,
                     p.adjust.method = "BH")
wilcox.res <- wilcox.res[["p.value"]]
print(wilcox.res)
mc = fullPTable(wilcox.res)
mc <- multcompLetters(mc)
mc <- mc[["Letters"]]
print(mc)
}
## [1] "Ascomycota"
## Levene's Test for Homogeneity of Variance (center = median)
##        Df F value   Pr(>F)    
## group   4  7.2906 2.73e-05 ***
##       120                     
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
##  Kruskal-Wallis rank sum test
## 
## data:  Abundance by depth
## Kruskal-Wallis chi-squared = 4.2424, df = 4, p-value = 0.3742
## 
##            0...10   10...20   20...30   30...40
## 10...20 0.6802454        NA        NA        NA
## 20...30 0.8626083 0.6802454        NA        NA
## 30...40 0.6802454 0.8626083 0.7375178        NA
## 40...   0.6574517 0.6574517 0.6574517 0.6212054
##  0...10 10...20 20...30 30...40   40... 
##     "a"     "a"     "a"     "a"     "a" 
## [1] "Basidiomycota"
## Levene's Test for Homogeneity of Variance (center = median)
##        Df F value Pr(>F)
## group   4  1.0875 0.3659
##       120               
## 
##  Kruskal-Wallis rank sum test
## 
## data:  Abundance by depth
## Kruskal-Wallis chi-squared = 38.942, df = 4, p-value = 7.161e-08
## 
##               0...10      10...20     20...30   30...40
## 10...20 6.119242e-01           NA          NA        NA
## 20...30 3.091939e-02 4.118506e-03          NA        NA
## 30...40 2.260766e-06 2.260766e-06 0.002588052        NA
## 40...   1.622194e-03 5.646255e-04 0.025631670 0.6721398
##  0...10 10...20 20...30 30...40   40... 
##     "a"     "a"     "b"     "c"     "c" 
## [1] "Mortierellomycota"
## Levene's Test for Homogeneity of Variance (center = median)
##        Df F value    Pr(>F)    
## group   4   5.562 0.0003841 ***
##       120                      
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
##  Kruskal-Wallis rank sum test
## 
## data:  Abundance by depth
## Kruskal-Wallis chi-squared = 39.225, df = 4, p-value = 6.259e-08
## 
##               0...10      10...20    20...30      30...40
## 10...20 8.866322e-02           NA         NA           NA
## 20...30 3.972787e-05 3.234148e-04         NA           NA
## 30...40 1.302516e-06 1.302516e-06 0.02136213           NA
## 40...   8.777488e-01 7.468220e-01 0.04799830 0.0008671149
##  0...10 10...20 20...30 30...40   40... 
##     "a"     "a"     "b"     "c"     "a" 
## [1] "Glomeromycota"
## Levene's Test for Homogeneity of Variance (center = median)
##        Df F value    Pr(>F)    
## group   4  4.9745 0.0009581 ***
##       120                      
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
##  Kruskal-Wallis rank sum test
## 
## data:  Abundance by depth
## Kruskal-Wallis chi-squared = 23.482, df = 4, p-value = 0.0001014
## 
##               0...10     10...20    20...30   30...40
## 10...20 9.666451e-03          NA         NA        NA
## 20...30 3.425034e-05 0.009666451         NA        NA
## 30...40 8.343651e-04 0.003236620 0.04959274        NA
## 40...   5.593752e-01 0.510709727 0.33419998 0.1147993
##  0...10 10...20 20...30 30...40   40... 
##     "a"     "b"     "c"     "d"  "abcd" 
## [1] "Chytridiomycota"
## Levene's Test for Homogeneity of Variance (center = median)
##        Df F value   Pr(>F)   
## group   4  4.0054 0.004377 **
##       120                    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
##  Kruskal-Wallis rank sum test
## 
## data:  Abundance by depth
## Kruskal-Wallis chi-squared = 84.286, df = 4, p-value < 2.2e-16
## 
##               0...10      10...20      20...30   30...40
## 10...20 5.507318e-01           NA           NA        NA
## 20...30 3.796944e-01 2.030024e-01           NA        NA
## 30...40 1.071611e-08 1.071611e-08 1.991630e-07        NA
## 40...   6.881199e-10 6.881199e-10 7.192628e-09 0.1413528
##  0...10 10...20 20...30 30...40   40... 
##     "a"     "a"     "a"     "b"     "b"

1.3.1.2. AMF below 30 cm

taxa <- "Glomeromycota"

# Construct a data.frame with the selected taxonomic group
df <- filter(x_df_nf, OTU == taxa & depth_numerical > 40)

#library(car)
# Using leveneTest()
result = leveneTest(Abundance ~ sample_type, df)
# print the result
print(result)
## Levene's Test for Homogeneity of Variance (center = median)
##       Df F value Pr(>F)
## group  2  1.1016   0.35
##       22
x <- df %>%
  group_by(sample_type) %>%
  summarise(mean = mean(Abundance, na.rm = TRUE), se = (sd(Abundance, na.rm = TRUE)/sqrt(length((Abundance)))))
print(x)
## # A tibble: 3 × 3
##   sample_type    mean     se
##   <fct>         <dbl>  <dbl>
## 1 meadow       0.0779 0.0511
## 2 organic      0.0404 0.0179
## 3 conventional 0.0165 0.0139
# one-way ANOVA
res.aov <- aov(Abundance ~ sample_type, data = df)
# Summary of the analysis
print(summary(res.aov))
##             Df Sum Sq  Mean Sq F value Pr(>F)
## sample_type  2 0.0161 0.008049   0.992  0.387
## Residuals   22 0.1785 0.008113

1.3.2 Forest only

1.3.2.1. Five most abundant phyla

x_df_nf <- subset(x_df, sample_type=="forest")

x_df_nf$depth <- as.factor(x_df_nf$depth)
x_df_nf$year <- "2019"

x <- x_df_nf %>%
  group_by(year, OTU) %>%
  summarise(mean = mean(Abundance, na.rm = TRUE), se = (sd(Abundance, na.rm = TRUE)/sqrt(length((Abundance)))))

y <- x %>%
    arrange(desc(mean)) %>%
    slice(1:5)
print(y)
## # A tibble: 5 × 4
## # Groups:   year [1]
##   year  OTU                  mean      se
##   <chr> <chr>               <dbl>   <dbl>
## 1 2019  Basidiomycota     0.533   0.0555 
## 2 2019  Ascomycota        0.402   0.0486 
## 3 2019  Mortierellomycota 0.0470  0.0158 
## 4 2019  Rozellomycota     0.0131  0.00974
## 5 2019  Glomeromycota     0.00368 0.00177
taxa <- y$OTU

x <- x_df_nf %>%
  filter(OTU %in% taxa) %>% group_by(OTU, depth) %>% summarise(mean = mean(Abundance, na.rm = TRUE), se = (sd(Abundance, na.rm = TRUE)/sqrt(length((Abundance)))))
print(x)
## # A tibble: 25 × 4
## # Groups:   OTU [5]
##    OTU           depth    mean     se
##    <chr>         <fct>   <dbl>  <dbl>
##  1 Ascomycota    0...10  0.478 0.0839
##  2 Ascomycota    10...20 0.516 0.0422
##  3 Ascomycota    20...30 0.274 0.160 
##  4 Ascomycota    30...40 0.407 0.129 
##  5 Ascomycota    40...   0.336 0.109 
##  6 Basidiomycota 0...10  0.449 0.121 
##  7 Basidiomycota 10...20 0.457 0.0382
##  8 Basidiomycota 20...30 0.597 0.213 
##  9 Basidiomycota 30...40 0.510 0.149 
## 10 Basidiomycota 40...   0.649 0.0941
## # ℹ 15 more rows
setwd('C:\\Users\\lehakkin\\PhD\\Fungal_ITS_soil_vertical_profile')

#write.csv2(x, file = "5_Phyla_in_soil_layers_ONLY_forest_mean.csv")
taxa <- "Glomeromycota"

# Construct a data.frame with the selected taxonomic group
df <- subset(x_df_nf, OTU==taxa)

#library(car)
# Using leveneTest()
result = leveneTest(Abundance ~ depth, df)
# print the result
print(result)
## Levene's Test for Homogeneity of Variance (center = median)
##       Df F value  Pr(>F)  
## group  4  3.1178 0.06588 .
##       10                  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# perform the Kruskal test
kruskal.test(Abundance ~ depth, data = df)
## 
##  Kruskal-Wallis rank sum test
## 
## data:  Abundance by depth
## Kruskal-Wallis chi-squared = 7.3861, df = 4, p-value = 0.1168

2. Class

ps_RA_x <- aggregate_rare(ps_RA, level = "class", detection = 0, prevalence = 0)
ps_RA_x              
## phyloseq-class experiment-level object
## otu_table()   OTU Table:         [ 68 taxa and 140 samples ]
## sample_data() Sample Data:       [ 140 samples by 32 sample variables ]
## tax_table()   Taxonomy Table:    [ 68 taxa by 1 taxonomic ranks ]
# 68 taxa and 140 samples


#create data table
x_df <-  psmelt(ps_RA_x)

2.1. Five most abundant class

x_df$year <- "2019"

x <- x_df %>%
  group_by(year, OTU) %>%
  summarise(mean = mean(Abundance, na.rm = TRUE), se = (sd(Abundance, na.rm = TRUE)/sqrt(length((Abundance)))))

y <- x %>%
    arrange(desc(mean)) %>%
    slice(1:5)
print(y)
## # A tibble: 5 × 4
## # Groups:   year [1]
##   year  OTU               mean     se
##   <chr> <chr>            <dbl>  <dbl>
## 1 2019  Leotiomycetes   0.282  0.0200
## 2 2019  Sordariomycetes 0.169  0.0124
## 3 2019  Dothideomycetes 0.127  0.0128
## 4 2019  Tremellomycetes 0.126  0.0105
## 5 2019  Agaricomycetes  0.0884 0.0145
taxa <- y$OTU

x <- x_df %>%
  filter(OTU %in% taxa) %>% group_by(OTU, sample_type) %>% summarise(mean = mean(Abundance, na.rm = TRUE), se = (sd(Abundance, na.rm = TRUE)/sqrt(length((Abundance)))))
print(x)
## # A tibble: 20 × 4
## # Groups:   OTU [5]
##    OTU             sample_type    mean      se
##    <chr>           <fct>         <dbl>   <dbl>
##  1 Agaricomycetes  forest       0.448  0.0653 
##  2 Agaricomycetes  meadow       0.0583 0.00947
##  3 Agaricomycetes  organic      0.0592 0.0217 
##  4 Agaricomycetes  conventional 0.0215 0.00727
##  5 Dothideomycetes forest       0.0501 0.0130 
##  6 Dothideomycetes meadow       0.189  0.0351 
##  7 Dothideomycetes organic      0.109  0.0178 
##  8 Dothideomycetes conventional 0.115  0.0151 
##  9 Leotiomycetes   forest       0.202  0.0354 
## 10 Leotiomycetes   meadow       0.301  0.0367 
## 11 Leotiomycetes   organic      0.270  0.0398 
## 12 Leotiomycetes   conventional 0.302  0.0375 
## 13 Sordariomycetes forest       0.0309 0.00929
## 14 Sordariomycetes meadow       0.0874 0.0157 
## 15 Sordariomycetes organic      0.239  0.0229 
## 16 Sordariomycetes conventional 0.225  0.0210 
## 17 Tremellomycetes forest       0.0757 0.0285 
## 18 Tremellomycetes meadow       0.139  0.0215 
## 19 Tremellomycetes organic      0.148  0.0209 
## 20 Tremellomycetes conventional 0.110  0.0157
setwd('C:\\Users\\lehakkin\\PhD\\Fungal_ITS_soil_vertical_profile')

#write.csv2(x, file = "5_Class_in_soil_layers_All_Management.csv")

2.2. Test for each class separately

taxa <- y$OTU

for (i in taxa) {
  # Construct a data.frame with the selected taxonomic group
df <- filter(x_df, OTU == i)
print(i)

# Using leveneTest()
result = leveneTest(Abundance ~ sample_type, df)
# print the result
print(result)


# perform the Kruskal test
k <- kruskal.test(Abundance ~ sample_type, data = df)
print(k)

w <- pairwise.wilcox.test(df$Abundance, df$sample_type,
                     p.adjust.method = "BH")
print(w)
}
## [1] "Leotiomycetes"
## Levene's Test for Homogeneity of Variance (center = median)
##        Df F value Pr(>F)
## group   3  0.6472  0.586
##       136               
## 
##  Kruskal-Wallis rank sum test
## 
## data:  Abundance by sample_type
## Kruskal-Wallis chi-squared = 3.8991, df = 3, p-value = 0.2726
## 
## 
##  Pairwise comparisons using Wilcoxon rank sum exact test 
## 
## data:  df$Abundance and df$sample_type 
## 
##              forest meadow organic
## meadow       0.33   -      -      
## organic      0.61   0.33   -      
## conventional 0.33   0.61   0.33   
## 
## P value adjustment method: BH 
## [1] "Sordariomycetes"
## Levene's Test for Homogeneity of Variance (center = median)
##        Df F value    Pr(>F)    
## group   3  8.0545 5.591e-05 ***
##       136                      
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
##  Kruskal-Wallis rank sum test
## 
## data:  Abundance by sample_type
## Kruskal-Wallis chi-squared = 47.221, df = 3, p-value = 3.12e-10
## 
## 
##  Pairwise comparisons using Wilcoxon rank sum exact test 
## 
## data:  df$Abundance and df$sample_type 
## 
##              forest  meadow  organic
## meadow       0.0057  -       -      
## organic      4.9e-07 1.9e-06 -      
## conventional 4.9e-07 1.9e-06 0.6393 
## 
## P value adjustment method: BH 
## [1] "Dothideomycetes"
## Levene's Test for Homogeneity of Variance (center = median)
##        Df F value    Pr(>F)    
## group   3  5.7442 0.0009917 ***
##       136                      
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
##  Kruskal-Wallis rank sum test
## 
## data:  Abundance by sample_type
## Kruskal-Wallis chi-squared = 9.5392, df = 3, p-value = 0.02292
## 
## 
##  Pairwise comparisons using Wilcoxon rank sum exact test 
## 
## data:  df$Abundance and df$sample_type 
## 
##              forest meadow organic
## meadow       0.024  -      -      
## organic      0.134  0.134  -      
## conventional 0.024  0.415  0.560  
## 
## P value adjustment method: BH 
## [1] "Tremellomycetes"
## Levene's Test for Homogeneity of Variance (center = median)
##        Df F value Pr(>F)
## group   3  1.4793  0.223
##       136               
## 
##  Kruskal-Wallis rank sum test
## 
## data:  Abundance by sample_type
## Kruskal-Wallis chi-squared = 5.8864, df = 3, p-value = 0.1173
## 
## 
##  Pairwise comparisons using Wilcoxon rank sum exact test 
## 
## data:  df$Abundance and df$sample_type 
## 
##              forest meadow organic
## meadow       0.20   -      -      
## organic      0.20   0.71   -      
## conventional 0.33   0.33   0.23   
## 
## P value adjustment method: BH 
## [1] "Agaricomycetes"
## Levene's Test for Homogeneity of Variance (center = median)
##        Df F value    Pr(>F)    
## group   3   13.41 1.018e-07 ***
##       136                      
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
##  Kruskal-Wallis rank sum test
## 
## data:  Abundance by sample_type
## Kruskal-Wallis chi-squared = 44.002, df = 3, p-value = 1.508e-09
## 
## 
##  Pairwise comparisons using Wilcoxon rank sum exact test 
## 
## data:  df$Abundance and df$sample_type 
## 
##              forest  meadow  organic
## meadow       4.7e-08 -       -      
## organic      4.7e-08 0.05845 -      
## conventional 4.7e-08 0.00015 0.52326
## 
## P value adjustment method: BH
x <- x_df %>%
  filter(OTU %in% taxa) %>% group_by(OTU, sample_type) %>% summarise(mean = mean(Abundance, na.rm = TRUE), se = (sd(Abundance, na.rm = TRUE)/sqrt(length((Abundance)))))
print(x)
## # A tibble: 20 × 4
## # Groups:   OTU [5]
##    OTU             sample_type    mean      se
##    <chr>           <fct>         <dbl>   <dbl>
##  1 Agaricomycetes  forest       0.448  0.0653 
##  2 Agaricomycetes  meadow       0.0583 0.00947
##  3 Agaricomycetes  organic      0.0592 0.0217 
##  4 Agaricomycetes  conventional 0.0215 0.00727
##  5 Dothideomycetes forest       0.0501 0.0130 
##  6 Dothideomycetes meadow       0.189  0.0351 
##  7 Dothideomycetes organic      0.109  0.0178 
##  8 Dothideomycetes conventional 0.115  0.0151 
##  9 Leotiomycetes   forest       0.202  0.0354 
## 10 Leotiomycetes   meadow       0.301  0.0367 
## 11 Leotiomycetes   organic      0.270  0.0398 
## 12 Leotiomycetes   conventional 0.302  0.0375 
## 13 Sordariomycetes forest       0.0309 0.00929
## 14 Sordariomycetes meadow       0.0874 0.0157 
## 15 Sordariomycetes organic      0.239  0.0229 
## 16 Sordariomycetes conventional 0.225  0.0210 
## 17 Tremellomycetes forest       0.0757 0.0285 
## 18 Tremellomycetes meadow       0.139  0.0215 
## 19 Tremellomycetes organic      0.148  0.0209 
## 20 Tremellomycetes conventional 0.110  0.0157

2.3. Test classes in soil layers

2.3.1 meadow, organic and conevntional only

2.3.1.1. Five most abundant classes

x_df_nf <- subset(x_df, sample_type!="forest")

x_df_nf$depth <- as.factor(x_df_nf$depth)
x_df_nf$year <- "2019"

x <- x_df_nf %>%
  group_by(year, OTU) %>%
  summarise(mean = mean(Abundance, na.rm = TRUE), se = (sd(Abundance, na.rm = TRUE)/sqrt(length((Abundance)))))

y <- x %>%
    arrange(desc(mean)) %>%
    slice(1:5)
print(y)
## # A tibble: 5 × 4
## # Groups:   year [1]
##   year  OTU                  mean      se
##   <chr> <chr>               <dbl>   <dbl>
## 1 2019  Leotiomycetes      0.291  0.0218 
## 2 2019  Sordariomycetes    0.185  0.0131 
## 3 2019  Dothideomycetes    0.137  0.0140 
## 4 2019  Tremellomycetes    0.132  0.0111 
## 5 2019  Mortierellomycetes 0.0793 0.00956
taxa <- y$OTU

x <- x_df_nf %>%
  filter(OTU %in% taxa) %>% group_by(OTU, depth) %>% summarise(mean = mean(Abundance, na.rm = TRUE), se = (sd(Abundance, na.rm = TRUE)/sqrt(length((Abundance)))))
print(x)
## # A tibble: 25 × 4
## # Groups:   OTU [5]
##    OTU             depth     mean     se
##    <chr>           <fct>    <dbl>  <dbl>
##  1 Dothideomycetes 0...10  0.296  0.0438
##  2 Dothideomycetes 10...20 0.193  0.0160
##  3 Dothideomycetes 20...30 0.0870 0.0164
##  4 Dothideomycetes 30...40 0.0515 0.0207
##  5 Dothideomycetes 40...   0.0548 0.0172
##  6 Leotiomycetes   0...10  0.115  0.0183
##  7 Leotiomycetes   10...20 0.150  0.0126
##  8 Leotiomycetes   20...30 0.310  0.0394
##  9 Leotiomycetes   30...40 0.422  0.0482
## 10 Leotiomycetes   40...   0.460  0.0626
## # ℹ 15 more rows
setwd('C:\\Users\\lehakkin\\PhD\\Fungal_ITS_soil_vertical_profile')

#write.csv2(x, file = "5_Classes_in_soil_layers_WITHOUT_forest_mean.csv")
taxa <- y$OTU

for (i in taxa) {
  # Construct a data.frame with the selected taxonomic group
df <- subset(x_df_nf, OTU==i)
print(i)

# Using leveneTest()
result = leveneTest(Abundance ~ depth, df)
# print the result
print(result)

# perform the Kruskal test
k <- kruskal.test(Abundance ~ depth, data = df)
print(k)

wilcox.res <- pairwise.wilcox.test(df$Abundance, df$depth,
                     p.adjust.method = "BH")
wilcox.res <- wilcox.res[["p.value"]]
print(wilcox.res)

mc = fullPTable(wilcox.res)
mc <- multcompLetters(mc)
mc <- mc[["Letters"]]
print(mc)
}
## [1] "Leotiomycetes"
## Levene's Test for Homogeneity of Variance (center = median)
##        Df F value    Pr(>F)    
## group   4  14.875 6.712e-10 ***
##       120                      
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
##  Kruskal-Wallis rank sum test
## 
## data:  Abundance by depth
## Kruskal-Wallis chi-squared = 43.951, df = 4, p-value = 6.568e-09
## 
##               0...10      10...20    20...30   30...40
## 10...20 2.170595e-02           NA         NA        NA
## 20...30 1.819319e-05 3.105480e-03         NA        NA
## 30...40 1.819319e-05 2.393976e-05 0.08486076        NA
## 40...   1.819319e-05 4.506015e-05 0.11043685 0.8626083
##  0...10 10...20 20...30 30...40   40... 
##     "a"     "b"     "c"     "c"     "c" 
## [1] "Sordariomycetes"
## Levene's Test for Homogeneity of Variance (center = median)
##        Df F value  Pr(>F)  
## group   4  2.6173 0.03845 *
##       120                  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
##  Kruskal-Wallis rank sum test
## 
## data:  Abundance by depth
## Kruskal-Wallis chi-squared = 25.086, df = 4, p-value = 4.835e-05
## 
##               0...10      10...20      20...30   30...40
## 10...20 3.753357e-01           NA           NA        NA
## 20...30 3.753357e-01 2.425450e-01           NA        NA
## 30...40 8.223114e-05 8.223114e-05 0.0002982584        NA
## 40...   2.183687e-01 1.084402e-01 0.3753357254 0.1072279
##  0...10 10...20 20...30 30...40   40... 
##     "a"     "a"     "a"     "b"    "ab" 
## [1] "Dothideomycetes"
## Levene's Test for Homogeneity of Variance (center = median)
##        Df F value    Pr(>F)    
## group   4  5.7029 0.0003088 ***
##       120                      
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
##  Kruskal-Wallis rank sum test
## 
## data:  Abundance by depth
## Kruskal-Wallis chi-squared = 64.123, df = 4, p-value = 3.937e-13
## 
##               0...10      10...20      20...30   30...40
## 10...20 3.510510e-01           NA           NA        NA
## 20...30 9.043065e-07 2.433622e-06           NA        NA
## 30...40 4.846508e-08 5.869563e-08 0.0005249541        NA
## 40...   1.060943e-07 3.675922e-07 0.0151251757 0.5900142
##  0...10 10...20 20...30 30...40   40... 
##     "a"     "a"     "b"     "c"     "c" 
## [1] "Tremellomycetes"
## Levene's Test for Homogeneity of Variance (center = median)
##        Df F value   Pr(>F)   
## group   4  4.5628 0.001825 **
##       120                    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
##  Kruskal-Wallis rank sum test
## 
## data:  Abundance by depth
## Kruskal-Wallis chi-squared = 72.482, df = 4, p-value = 6.787e-15
## 
##               0...10      10...20      20...30    30...40
## 10...20 3.448406e-01           NA           NA         NA
## 20...30 3.257171e-02 1.715879e-03           NA         NA
## 30...40 1.503038e-12 3.164291e-13 6.082530e-06         NA
## 40...   1.211627e-08 4.250065e-09 2.741038e-05 0.04327362
##  0...10 10...20 20...30 30...40   40... 
##     "a"     "a"     "b"     "c"     "d" 
## [1] "Mortierellomycetes"
## Levene's Test for Homogeneity of Variance (center = median)
##        Df F value    Pr(>F)    
## group   4   5.562 0.0003841 ***
##       120                      
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
##  Kruskal-Wallis rank sum test
## 
## data:  Abundance by depth
## Kruskal-Wallis chi-squared = 39.225, df = 4, p-value = 6.259e-08
## 
##               0...10      10...20    20...30      30...40
## 10...20 8.866322e-02           NA         NA           NA
## 20...30 3.972787e-05 3.234148e-04         NA           NA
## 30...40 1.302516e-06 1.302516e-06 0.02136213           NA
## 40...   8.777488e-01 7.468220e-01 0.04799830 0.0008671149
##  0...10 10...20 20...30 30...40   40... 
##     "a"     "a"     "b"     "c"     "a"

2.3.2 Forest only

2.3.2.1. Five most abundant classes

x_df_nf <- subset(x_df, sample_type=="forest")

x_df_nf$depth <- as.factor(x_df_nf$depth)
x_df_nf$year <- "2019"

x <- x_df_nf %>%
  group_by(year, OTU) %>%
  summarise(mean = mean(Abundance, na.rm = TRUE), se = (sd(Abundance, na.rm = TRUE)/sqrt(length((Abundance)))))

y <- x %>%
    arrange(desc(mean)) %>%
    slice(1:5)
print(y)
## # A tibble: 5 × 4
## # Groups:   year [1]
##   year  OTU               mean     se
##   <chr> <chr>            <dbl>  <dbl>
## 1 2019  Agaricomycetes  0.448  0.0653
## 2 2019  Leotiomycetes   0.202  0.0354
## 3 2019  Tremellomycetes 0.0757 0.0285
## 4 2019  Pezizomycetes   0.0520 0.0233
## 5 2019  Dothideomycetes 0.0501 0.0130
taxa <- y$OTU

x <- x_df_nf %>%
  filter(OTU %in% taxa) %>% group_by(OTU, depth) %>% summarise(mean = mean(Abundance, na.rm = TRUE), se = (sd(Abundance, na.rm = TRUE)/sqrt(length((Abundance)))))
print(x)
## # A tibble: 25 × 4
## # Groups:   OTU [5]
##    OTU             depth      mean      se
##    <chr>           <fct>     <dbl>   <dbl>
##  1 Agaricomycetes  0...10  0.217   0.0933 
##  2 Agaricomycetes  10...20 0.395   0.0287 
##  3 Agaricomycetes  20...30 0.539   0.260  
##  4 Agaricomycetes  30...40 0.507   0.151  
##  5 Agaricomycetes  40...   0.578   0.0838 
##  6 Dothideomycetes 0...10  0.0996  0.0324 
##  7 Dothideomycetes 10...20 0.0913  0.00381
##  8 Dothideomycetes 20...30 0.0418  0.0285 
##  9 Dothideomycetes 30...40 0.0121  0.0120 
## 10 Dothideomycetes 40...   0.00578 0.00573
## # ℹ 15 more rows
setwd('C:\\Users\\lehakkin\\PhD\\Fungal_ITS_soil_vertical_profile')

#write.csv2(x, file = "5_Classes_in_soil_layers_ONLY_forest_mean.csv")
taxa <- y$OTU

for (i in taxa) {
  # Construct a data.frame with the selected taxonomic group
df <- subset(x_df_nf, OTU==i)
print(i)

# Using leveneTest()
result = leveneTest(Abundance ~ depth, df)
# print the result
print(result)

# perform the Kruskal test
k <- kruskal.test(Abundance ~ depth, data = df)
print(k)

wilcox.res <- pairwise.wilcox.test(df$Abundance, df$depth,
                     p.adjust.method = "BH")
wilcox.res <- wilcox.res[["p.value"]]
print(wilcox.res)

mc = fullPTable(wilcox.res)
mc <- multcompLetters(mc)
mc <- mc[["Letters"]]
print(mc)
}
## [1] "Agaricomycetes"
## Levene's Test for Homogeneity of Variance (center = median)
##       Df F value Pr(>F)
## group  4  0.8107 0.5461
##       10               
## 
##  Kruskal-Wallis rank sum test
## 
## data:  Abundance by depth
## Kruskal-Wallis chi-squared = 4.7, df = 4, p-value = 0.3195
## 
##            0...10 10...20 20...30 30...40
## 10...20 1.0000000      NA      NA      NA
## 20...30 1.0000000     1.0      NA      NA
## 30...40 0.6666667     1.0       1      NA
## 40...   0.5000000     0.5       1       1
##  0...10 10...20 20...30 30...40   40... 
##     "a"     "a"     "a"     "a"     "a" 
## [1] "Leotiomycetes"
## Levene's Test for Homogeneity of Variance (center = median)
##       Df F value Pr(>F)
## group  4  0.2981 0.8726
##       10               
## 
##  Kruskal-Wallis rank sum test
## 
## data:  Abundance by depth
## Kruskal-Wallis chi-squared = 2.0667, df = 4, p-value = 0.7235
## 
##            0...10   10...20   20...30   30...40
## 10...20 0.7777778        NA        NA        NA
## 20...30 0.7777778 0.7777778        NA        NA
## 30...40 0.7777778 0.7777778 0.7777778        NA
## 40...   0.7777778 0.7777778 1.0000000 0.7777778
##  0...10 10...20 20...30 30...40   40... 
##     "a"     "a"     "a"     "a"     "a" 
## [1] "Tremellomycetes"
## Levene's Test for Homogeneity of Variance (center = median)
##       Df F value Pr(>F)
## group  4  1.3902 0.3055
##       10               
## 
##  Kruskal-Wallis rank sum test
## 
## data:  Abundance by depth
## Kruskal-Wallis chi-squared = 7.1667, df = 4, p-value = 0.1273
## 
##            0...10   10...20   20...30 30...40
## 10...20 0.4000000        NA        NA      NA
## 20...30 0.4000000 0.7777778        NA      NA
## 30...40 0.4000000 0.4000000 0.5714286      NA
## 40...   0.5714286 0.7777778 1.0000000     0.4
##  0...10 10...20 20...30 30...40   40... 
##     "a"     "a"     "a"     "a"     "a" 
## [1] "Pezizomycetes"
## Levene's Test for Homogeneity of Variance (center = median)
##       Df F value Pr(>F)
## group  4   1.392 0.3049
##       10               
## 
##  Kruskal-Wallis rank sum test
## 
## data:  Abundance by depth
## Kruskal-Wallis chi-squared = 8.1, df = 4, p-value = 0.08798
## 
##            0...10   10...20 20...30   30...40
## 10...20 0.4000000        NA      NA        NA
## 20...30 0.2500000 0.5714286      NA        NA
## 30...40 0.2500000 0.2500000    0.25        NA
## 40...   0.7777778 0.7777778    1.00 0.5714286
##  0...10 10...20 20...30 30...40   40... 
##     "a"     "a"     "a"     "a"     "a" 
## [1] "Dothideomycetes"
## Levene's Test for Homogeneity of Variance (center = median)
##       Df F value Pr(>F)
## group  4  0.5163 0.7259
##       10               
## 
##  Kruskal-Wallis rank sum test
## 
## data:  Abundance by depth
## Kruskal-Wallis chi-squared = 8.5667, df = 4, p-value = 0.07289
## 
##            0...10   10...20 20...30 30...40
## 10...20 0.7777778        NA      NA      NA
## 20...30 0.4000000 0.5000000      NA      NA
## 30...40 0.4000000 0.3333333     0.5      NA
## 40...   0.3333333 0.3333333     0.5       1
##  0...10 10...20 20...30 30...40   40... 
##     "a"     "a"     "a"     "a"     "a"

2.3.3 Test Classes that popped up in certain layers

ps_RA_x <- aggregate_rare(ps_RA, level = "class", detection = 0, prevalence = 0)
ps_RA_x              
## phyloseq-class experiment-level object
## otu_table()   OTU Table:         [ 68 taxa and 140 samples ]
## sample_data() Sample Data:       [ 140 samples by 32 sample variables ]
## tax_table()   Taxonomy Table:    [ 68 taxa by 1 taxonomic ranks ]
# 68 taxa and 140 samples


#create data table
x_df <-  psmelt(ps_RA_x)

This sentence based on looking at the class composition barplot:

“Fungal classes that had mostly low proportion but popped up in specific layers were Pezizomycetes in forest 30-40 cm layer (ANOVA; p = 0.006), Archaeosporomycetes in organic 30-80 cm, Geoglossomycetes in forest 10-20 cm, meadow 10-40 cm and organic 20-80 cm, Microbotryomycetes in conventional 40-80 cm, and Glomeromycetes in meadow 30-80 cm, and Orbiliomycetes.”

Let’s test them separately

2.3.3.1. Pezizomycetes

x_df_nf <- subset(x_df, sample_type=="forest")

x_df_nf$compare <- NA

x_df_nf$compare[x_df_nf$depth=="30...40"]<-"yes"
x_df_nf$compare[is.na(x_df_nf$compare)]<-"no"
taxa <- "Pezizomycetes"

# Construct a data.frame with the selected taxonomic group
df <- subset(x_df_nf, OTU==taxa)

#library(car)
# Using leveneTest()
result = leveneTest(Abundance ~ compare, df)
# print the result
print(result)
## Levene's Test for Homogeneity of Variance (center = median)
##       Df F value Pr(>F)
## group  1  2.6256 0.1291
##       13
# first one-way ANOVA
res.aov <- aov(Abundance ~ compare, data = df)
# Summary of the analysis
print(summary(res.aov))
##             Df  Sum Sq Mean Sq F value  Pr(>F)   
## compare      1 0.05219 0.05219   10.96 0.00564 **
## Residuals   13 0.06193 0.00476                   
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

This sentence remains to be tested:

“Fungal classes that had mostly low proportion but popped up in specific layers were Pezizomycetes in forest 30-40 cm layer (ANOVA; p = 0.006), Archaeosporomycetes in organic 30-80 cm, Geoglossomycetes in forest 10-20 cm, meadow 10-40 cm and organic 20-80 cm, Microbotryomycetes in conventional 40-80 cm, and Glomeromycetes in meadow 30-80 cm, and Orbiliomycetes.”

2.3.3.2. Archaeosporomycetes

x_df_nf <- subset(x_df, sample_type=="organic")

x_df_nf$compare <- NA

x_df_nf$compare[x_df_nf$depth=="30...40"]<-"yes"
x_df_nf$compare[is.na(x_df_nf$compare)]<-"no"
taxa <- "Archaeosporomycetes"

# Construct a data.frame with the selected taxonomic group
df <- subset(x_df_nf, OTU==taxa)

#library(car)
# Using leveneTest()
result = leveneTest(Abundance ~ compare, df)
# print the result
print(result)
## Levene's Test for Homogeneity of Variance (center = median)
##       Df F value  Pr(>F)  
## group  1  5.1753 0.02864 *
##       38                  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# perform the Kruskal test
k <- kruskal.test(Abundance ~ compare, data = df)
print(k)
## 
##  Kruskal-Wallis rank sum test
## 
## data:  Abundance by compare
## Kruskal-Wallis chi-squared = 0, df = 1, p-value = 1

Not significant!

This sentence remains to be tested:

“Fungal classes that had mostly low proportion but popped up in specific layers were Pezizomycetes in forest 30-40 cm layer (ANOVA; p = 0.006), Geoglossomycetes in meadow 10-40 cm, Microbotryomycetes in conventional 40-80 cm, and Glomeromycetes in meadow 30-80 cm, and Orbiliomycetes.”

2.3.3.3. Geoglossomycetes

not sig. in:

  • forest 10-20 cm
  • organic 20-80 cm
x_df_nf <- subset(x_df, sample_type=="meadow")

x_df_nf$compare <- NA

x_df_nf$compare[x_df_nf$depth=="20...30"]<-"yes"
x_df_nf$compare[x_df_nf$depth=="30...40"]<-"yes"
x_df_nf$compare[x_df_nf$depth=="10...20"]<-"yes"
x_df_nf$compare[is.na(x_df_nf$compare)]<-"no"
taxa <- "Geoglossomycetes"

# Construct a data.frame with the selected taxonomic group
df <- subset(x_df_nf, OTU==taxa)

#library(car)
# Using leveneTest()
result = leveneTest(Abundance ~ compare, df)
# print the result
print(result)
## Levene's Test for Homogeneity of Variance (center = median)
##       Df F value    Pr(>F)    
## group  1  21.441 4.181e-05 ***
##       38                      
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# perform the Kruskal test
k <- kruskal.test(Abundance ~ compare, data = df)
print(k)
## 
##  Kruskal-Wallis rank sum test
## 
## data:  Abundance by compare
## Kruskal-Wallis chi-squared = 14.099, df = 1, p-value = 0.0001734

Is sig. in meadow 10-40 cm

“Fungal classes that had mostly low proportion but popped up in specific layers were Pezizomycetes in forest 30-40 cm layer (ANOVA; p = 0.006), Geoglossomycetes meadow 10-40 cm (Kruskal; p = 0.000), Microbotryomycetes in conventional 40-80 cm (Kruskal, P = 0.030), and Glomeromycetes in meadow 30-80 cm, and Orbiliomycetes.”

2.3.3.4. Microbotryomycetes

x_df_nf <- subset(x_df, sample_type=="conventional")

x_df_nf$compare <- NA

x_df_nf$compare[x_df_nf$depth=="40..."]<-"yes"
x_df_nf$compare[is.na(x_df_nf$compare)]<-"no"
taxa <- "Microbotryomycetes"

# Construct a data.frame with the selected taxonomic group
df <- subset(x_df_nf, OTU==taxa)

#library(car)
# Using leveneTest()
result = leveneTest(Abundance ~ compare, df)
# print the result
print(result)
## Levene's Test for Homogeneity of Variance (center = median)
##       Df F value   Pr(>F)   
## group  1  7.2692 0.009974 **
##       43                    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# perform the Kruskal test
k <- kruskal.test(Abundance ~ compare, data = df)
print(k)
## 
##  Kruskal-Wallis rank sum test
## 
## data:  Abundance by compare
## Kruskal-Wallis chi-squared = 4.6876, df = 1, p-value = 0.03038

Fungal classes that had mostly low proportion but popped up in specific layers were Pezizomycetes in forest 30-40 cm layer (ANOVA; p = 0.006), Geoglossomycetes meadow 10-40 cm (Kruskal; p = 0.000), Microbotryomycetes in conventional 40-80 cm (Kruskal, P = 0.030), and Glomeromycetes in meadow 30-80 cm, and Orbiliomycetes.

2.3.3.5. Glomeromycetes

x_df_nf <- subset(x_df, sample_type=="meadow")

x_df_nf$compare <- NA

x_df_nf$compare[x_df_nf$depth=="30...40"]<-"yes"
x_df_nf$compare[x_df_nf$depth=="40..."]<-"yes"
x_df_nf$compare[is.na(x_df_nf$compare)]<-"no"
taxa <- "Glomeromycetes"

# Construct a data.frame with the selected taxonomic group
df <- subset(x_df_nf, OTU==taxa)

#library(car)
# Using leveneTest()
result = leveneTest(Abundance ~ compare, df)
# print the result
print(result)
## Levene's Test for Homogeneity of Variance (center = median)
##       Df F value   Pr(>F)   
## group  1  7.4061 0.009755 **
##       38                    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# perform the Kruskal test
k <- kruskal.test(Abundance ~ compare, data = df)
print(k)
## 
##  Kruskal-Wallis rank sum test
## 
## data:  Abundance by compare
## Kruskal-Wallis chi-squared = 0.93404, df = 1, p-value = 0.3338

Not sig.

Fungal classes that had mostly low proportion but popped up in specific layers were Pezizomycetes in forest 30-40 cm layer (ANOVA; p = 0.006), Geoglossomycetes meadow 10-40 cm (Kruskal; p = 0.000), Microbotryomycetes in conventional 40-80 cm (Kruskal, P = 0.030), and Orbiliomycetes in meadow 10-30 cm.

2.3.3.6. Orbiliomycetes.

x_df_nf <- subset(x_df, sample_type=="meadow")

x_df_nf$compare <- NA

x_df_nf$compare[x_df_nf$depth=="10...20"]<-"yes"
x_df_nf$compare[x_df_nf$depth=="20...30"]<-"yes"
x_df_nf$compare[is.na(x_df_nf$compare)]<-"no"
taxa <- "Orbiliomycetes"

# Construct a data.frame with the selected taxonomic group
df <- subset(x_df_nf, OTU==taxa)

#library(car)
# Using leveneTest()
result = leveneTest(Abundance ~ compare, df)
# print the result
print(result)
## Levene's Test for Homogeneity of Variance (center = median)
##       Df F value    Pr(>F)    
## group  1  18.614 0.0001099 ***
##       38                      
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# perform the Kruskal test
k <- kruskal.test(Abundance ~ compare, data = df)
print(k)
## 
##  Kruskal-Wallis rank sum test
## 
## data:  Abundance by compare
## Kruskal-Wallis chi-squared = 19.128, df = 1, p-value = 1.222e-05

This sentence remains valid:

“Fungal classes that had mostly low proportion but popped up in specific layers were Pezizomycetes in forest 30-40 cm layer (ANOVA; p = 0.006), Geoglossomycetes meadow 10-40 cm (Kruskal; p = 0.000), Microbotryomycetes in conventional 40-80 cm (Kruskal, P = 0.030), and Orbiliomycetes in meadow 10-30 cm (Kruskal; p = 0.000).”

EXTRA

library(car)
library("rcompanion")
library("multcompView")

taxa <- "Tremellomycetes"

df <- subset(x_df, OTU==taxa)
df <- subset(df, depth=="0...10" | depth=="10...20")

# Using leveneTest()
result = leveneTest(Abundance ~ sample_type, df)
# print the result
print(result)
## Levene's Test for Homogeneity of Variance (center = median)
##       Df F value Pr(>F)
## group  3  1.0867  0.363
##       52
# perform the Kruskal test
k <- kruskal.test(Abundance ~ sample_type, data = df)
print(k)
## 
##  Kruskal-Wallis rank sum test
## 
## data:  Abundance by sample_type
## Kruskal-Wallis chi-squared = 10.71, df = 3, p-value = 0.0134
wilcox.res <- pairwise.wilcox.test(df$Abundance, df$sample_type,
                     p.adjust.method = "BH")
wilcox.res <- wilcox.res[["p.value"]]
print(wilcox.res)
##                  forest    meadow    organic
## meadow       0.10743261        NA         NA
## organic      0.06401029 0.1074326         NA
## conventional 0.10743261 0.9051620 0.04755236
mc = fullPTable(wilcox.res)
mc <- multcompLetters(mc)
mc <- mc[["Letters"]]
print(mc)
##       forest       meadow      organic conventional 
##         "ab"         "ab"          "a"          "b"
x <- df %>% group_by(sample_type) %>% summarise(mean = mean(Abundance, na.rm = TRUE), se = (sd(Abundance, na.rm = TRUE)/sqrt(length((Abundance)))))
print(x)
## # A tibble: 4 × 3
##   sample_type   mean     se
##   <fct>        <dbl>  <dbl>
## 1 forest       0.129 0.0614
## 2 meadow       0.214 0.0263
## 3 organic      0.275 0.0214
## 4 conventional 0.196 0.0149

SOC and fungal classes Spearman: I don’t use these

ps_RA_x <- aggregate_rare(ps_RA, level = "class", detection = 0, prevalence = 0)
ps_RA_x              
## phyloseq-class experiment-level object
## otu_table()   OTU Table:         [ 68 taxa and 140 samples ]
## sample_data() Sample Data:       [ 140 samples by 32 sample variables ]
## tax_table()   Taxonomy Table:    [ 68 taxa by 1 taxonomic ranks ]
# 68 taxa and 140 samples


#create data table
x_df <-  psmelt(ps_RA_x)


df <- subset(x_df, sample_type!="forest")

df$year <- "2019"

x <- df %>%
  group_by(year, OTU) %>%
  summarise(mean = mean(Abundance, na.rm = TRUE), se = (sd(Abundance, na.rm = TRUE)/sqrt(length((Abundance)))))

y <- x %>%
    arrange(desc(mean)) %>%
    slice(1:5)
print(y)
## # A tibble: 5 × 4
## # Groups:   year [1]
##   year  OTU                  mean      se
##   <chr> <chr>               <dbl>   <dbl>
## 1 2019  Leotiomycetes      0.291  0.0218 
## 2 2019  Sordariomycetes    0.185  0.0131 
## 3 2019  Dothideomycetes    0.137  0.0140 
## 4 2019  Tremellomycetes    0.132  0.0111 
## 5 2019  Mortierellomycetes 0.0793 0.00956
taxa <- y$OTU


for (i in taxa) {
 df_x <- subset(df, OTU == i)
 print(i)
 x <- cor.test(df_x$Abundance, df_x$C_g_per_kg,  method = "spearman", na.rm = TRUE)
 print(x)
}
## [1] "Leotiomycetes"
## 
##  Spearman's rank correlation rho
## 
## data:  df_x$Abundance and df_x$C_g_per_kg
## S = 481191, p-value = 1.685e-08
## alternative hypothesis: true rho is not equal to 0
## sample estimates:
##        rho 
## -0.4783141 
## 
## [1] "Sordariomycetes"
## 
##  Spearman's rank correlation rho
## 
## data:  df_x$Abundance and df_x$C_g_per_kg
## S = 289884, p-value = 0.2245
## alternative hypothesis: true rho is not equal to 0
## sample estimates:
##       rho 
## 0.1094195 
## 
## [1] "Dothideomycetes"
## 
##  Spearman's rank correlation rho
## 
## data:  df_x$Abundance and df_x$C_g_per_kg
## S = 104195, p-value < 2.2e-16
## alternative hypothesis: true rho is not equal to 0
## sample estimates:
##       rho 
## 0.6798935 
## 
## [1] "Tremellomycetes"
## 
##  Spearman's rank correlation rho
## 
## data:  df_x$Abundance and df_x$C_g_per_kg
## S = 95314, p-value < 2.2e-16
## alternative hypothesis: true rho is not equal to 0
## sample estimates:
##       rho 
## 0.7071777 
## 
## [1] "Mortierellomycetes"
## 
##  Spearman's rank correlation rho
## 
## data:  df_x$Abundance and df_x$C_g_per_kg
## S = 410271, p-value = 0.003354
## alternative hypothesis: true rho is not equal to 0
## sample estimates:
##        rho 
## -0.2604336

RESULTS STEP 12: AMF PERMANOVA

AMF PERMANOVA will be done at genus level, because the PERMANOVA is used to support the AMF bubble plot in STEP 13, which is done at genus level.

1. Load data

library('phyloseq')
library("cowplot")
library("dplyr")
library("ggplot2")
library("vegan")
library("microbiome")
library("tibble")
library("pairwiseAdonis")


setwd('C:\\Users\\lehakkin\\PhD\\Fungal_ITS_soil_vertical_profile')


load('ps_FINAL')
ps
## phyloseq-class experiment-level object
## otu_table()   OTU Table:         [ 20610 taxa and 140 samples ]
## sample_data() Sample Data:       [ 140 samples by 32 sample variables ]
## tax_table()   Taxonomy Table:    [ 20610 taxa by 7 taxonomic ranks ]
ps_GM <- subset_taxa(ps, phylum=="Glomeromycota")
ps_GM
## phyloseq-class experiment-level object
## otu_table()   OTU Table:         [ 263 taxa and 140 samples ]
## sample_data() Sample Data:       [ 140 samples by 32 sample variables ]
## tax_table()   Taxonomy Table:    [ 263 taxa by 7 taxonomic ranks ]
# 263 taxa and 140 samples

ps_GM <- aggregate_rare(ps_GM, level = "genus", detection = 0, prevalence = 0)
ps_GM
## phyloseq-class experiment-level object
## otu_table()   OTU Table:         [ 17 taxa and 140 samples ]
## sample_data() Sample Data:       [ 140 samples by 32 sample variables ]
## tax_table()   Taxonomy Table:    [ 17 taxa by 1 taxonomic ranks ]
# 17 taxa and 140 samples

# Pick relative abundances (compositional) and sample metadata 
ps_RA <- microbiome::transform(ps_GM, "compositional")
otu <- abundances(ps_RA)
meta <- meta(ps)

2. Check samples WO AMF

PERMANOVA cannot handle NAs, so I have to remove samples that do not have any AMF taxa from the analysis

# check how many AMF with dim (although I already know it is 17)
dim(otu)
## [1]  17 140
# If column sum adds up to zero, it means that that sample doesn't have any AMF. check how many zero values samples I have, and remove sample if all (17) are zero
x <- colSums(otu==0)==17
z <- which(x, arr.ind = FALSE, useNames = TRUE)
print(z)
##  CG9.1_30to40  CG9.1_40to70 CPO5.1_40to70 CPO5.2_40to70 CR14.1_40to80 
##             4             5            20            25            35 
##     M2_40to60     M3_30to40     M3_40to60  NG2A1_40to70  NG2B3_40to70 
##            55            59            60            65            90 
## OG10.2_40to70 OG10.3_30to40 OG10.3_40to70 OR13.1_30to40 OR13.1_40to80 
##           110           114           115           129           130
Samples_toRemove <- c("CG9.1_30to40", "CG9.1_40to70", "CPO5.1_40to70", "CPO5.2_40to70", "CR14.1_40to80", "M2_40to60", "M3_30to40", "M3_40to60", "NG2A1_40to70", "NG2B3_40to70", "OG10.2_40to70", "OG10.3_30to40", "OG10.3_40to70", "OR13.1_30to40", "OR13.1_40to80")

ps_GM_pruned <- subset_samples(ps_RA, !(sampleID %in% Samples_toRemove))
ps_GM_pruned
## phyloseq-class experiment-level object
## otu_table()   OTU Table:         [ 17 taxa and 125 samples ]
## sample_data() Sample Data:       [ 125 samples by 32 sample variables ]
## tax_table()   Taxonomy Table:    [ 17 taxa by 1 taxonomic ranks ]
# 17 taxa and 125 samples remained in the dataset

# lets make sure all missing taxa are removed
prev0 = apply(X = otu_table(ps_GM_pruned),
              MARGIN = ifelse(taxa_are_rows(ps_GM_pruned), yes = 1, no = 2),
              FUN = function(x){sum(x > 0)})

# Execute prevalence filter, using `prune_taxa()` function
ps_GM_pruned = prune_taxa((prev0 > 0), ps_GM_pruned)
ps_GM_pruned
## phyloseq-class experiment-level object
## otu_table()   OTU Table:         [ 17 taxa and 125 samples ]
## sample_data() Sample Data:       [ 125 samples by 32 sample variables ]
## tax_table()   Taxonomy Table:    [ 17 taxa by 1 taxonomic ranks ]
# 17 taxa and 125 samples

# Pick relative abundances (compositional) and sample metadata 
ps_RA <- ps_GM_pruned
otu <- abundances(ps_RA)
meta <- meta(ps_RA)

3. Bray distance

# note! the distance matrix is now at genus level!
ps_RA_bray <- phyloseq::distance(ps_RA, method = "bray")

GP.ord <- ordinate(ps_RA, "PCoA", "bray")
p1 = plot_ordination(ps_RA, GP.ord, type="samples", color="sample_type", shape = "depth")
print(p1)

4. PERMANOVA

4.1. Management

# first with just soil type and strata option
a <- adonis2(formula = ps_RA_bray~ sample_type, data = meta, permutations = 9999, method = "bray", by = "margin", strata = meta$depth)
print(a)
## Permutation test for adonis under reduced model
## Marginal effects of terms
## Blocks:  strata 
## Permutation: free
## Number of permutations: 9999
## 
## adonis2(formula = ps_RA_bray ~ sample_type, data = meta, permutations = 9999, method = "bray", by = "margin", strata = meta$depth)
##              Df SumOfSqs      R2     F Pr(>F)    
## sample_type   3    4.029 0.11393 5.186  1e-04 ***
## Residual    121   31.336 0.88607                 
## Total       124   35.366 1.00000                 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

4.2. Depth

# then with just depth and strata option
a <- adonis2(formula = ps_RA_bray~ depth, data = meta, permutations = 9999, method = "bray", by = "margin", strata = meta$sample_type)
print(a)
## Permutation test for adonis under reduced model
## Marginal effects of terms
## Blocks:  strata 
## Permutation: free
## Number of permutations: 9999
## 
## adonis2(formula = ps_RA_bray ~ depth, data = meta, permutations = 9999, method = "bray", by = "margin", strata = meta$sample_type)
##           Df SumOfSqs      R2      F Pr(>F)    
## depth      4    2.978 0.08421 2.7584  1e-04 ***
## Residual 120   32.388 0.91579                  
## Total    124   35.366 1.00000                  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

The AMF communities at genus level differed more between management types (PERMANOVA; R2 = 0.114; p = 0.000) than between soil layers (PERMANOVA; R2 = 0.084; p = 0.000).

4.3. Both (not used)

# For the full model it matters which "by" option we choose. When by="terms" will assess significance for each term sequentially from first to last, so that the order of terms matter. We will use this, because with sequential analysis we will get R2 values that sum up to 1, and will also get the significance and R2 values for each interaction term separately rather than for the interaction alone

# because sample type had larger R2 I will put it first in the model
# note interaction term was not significant!

final <- adonis2(formula = ps_RA_bray ~ sample_type + depth, data = meta, permutations = 9999, method = "bray", by = "terms")
print(final)
## Permutation test for adonis under reduced model
## Terms added sequentially (first to last)
## Permutation: free
## Number of permutations: 9999
## 
## adonis2(formula = ps_RA_bray ~ sample_type + depth, data = meta, permutations = 9999, method = "bray", by = "terms")
##              Df SumOfSqs      R2      F Pr(>F)    
## sample_type   3    4.029 0.11393 5.5646  1e-04 ***
## depth         4    3.097 0.08758 3.2082  2e-04 ***
## Residual    117   28.239 0.79849                  
## Total       124   35.366 1.00000                  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

5. Pairwise PERMANOVA

5.1. By management (not used)

set.seed(777)
pair.mod<-pairwise.adonis(ps_RA_bray,factors=meta$sample_type)
pair.mod
##                     pairs Df SumsOfSqs   F.Model         R2 p.value p.adjusted
## 1  conventional vs forest  1 1.1125799  3.748075 0.06973412   0.009      0.054
## 2  conventional vs meadow  1 1.2654046  4.477131 0.05563234   0.003      0.018
## 3 conventional vs organic  1 0.6465733  2.776924 0.03664604   0.022      0.132
## 4        forest vs meadow  1 0.6459536  2.162313 0.04310632   0.075      0.450
## 5       forest vs organic  1 1.6460377  7.515467 0.14310959   0.001      0.006
## 6       meadow vs organic  1 2.5882231 11.141036 0.13563301   0.001      0.006
##   sig
## 1    
## 2   .
## 3    
## 4    
## 5   *
## 6   *
x <- as.data.frame(pair.mod)
print(x)
##                     pairs Df SumsOfSqs   F.Model         R2 p.value p.adjusted
## 1  conventional vs forest  1 1.1125799  3.748075 0.06973412   0.009      0.054
## 2  conventional vs meadow  1 1.2654046  4.477131 0.05563234   0.003      0.018
## 3 conventional vs organic  1 0.6465733  2.776924 0.03664604   0.022      0.132
## 4        forest vs meadow  1 0.6459536  2.162313 0.04310632   0.075      0.450
## 5       forest vs organic  1 1.6460377  7.515467 0.14310959   0.001      0.006
## 6       meadow vs organic  1 2.5882231 11.141036 0.13563301   0.001      0.006
##   sig
## 1    
## 2   .
## 3    
## 4    
## 5   *
## 6   *
setwd('C:\\Users\\lehakkin\\PhD\\Fungal_ITS_soil_vertical_profile')

#write.csv2(x, file = "AMF_Pairwise_PERMANOVA_by_MANAGEMENT.csv")

5.2. By depth (not used)

set.seed(777)
pair.mod<-pairwise.adonis(ps_RA_bray,factors=meta$depth)
pair.mod
##                 pairs Df  SumsOfSqs   F.Model          R2 p.value p.adjusted
## 1   0...10 vs 10...20  1 0.08290182 0.3780362 0.006952001   0.844       1.00
## 2   0...10 vs 20...30  1 0.34215045 1.3034979 0.023569900   0.236       1.00
## 3   0...10 vs 30...40  1 1.17951804 4.3871337 0.080664918   0.006       0.06
## 4     0...10 vs 40...  1 1.18589735 4.2227763 0.089422448   0.005       0.05
## 5  10...20 vs 20...30  1 0.30058651 1.2249127 0.022180438   0.279       1.00
## 6  10...20 vs 30...40  1 1.30971371 5.2305082 0.094703242   0.003       0.03
## 7    10...20 vs 40...  1 1.55697160 6.0029207 0.122501284   0.002       0.02
## 8  20...30 vs 30...40  1 0.53352935 1.7961256 0.034676834   0.134       1.00
## 9    20...30 vs 40...  1 0.85776912 2.7351625 0.059804369   0.031       0.31
## 10   30...40 vs 40...  1 0.30239257 0.9246894 0.023160842   0.453       1.00
##    sig
## 1     
## 2     
## 3     
## 4    .
## 5     
## 6    .
## 7    .
## 8     
## 9     
## 10

6. PERMANOVA: Management effect at different depths

meta$depth <- as.factor(meta$depth)

for (i in levels(meta$depth)) {
  # subset samples
ps_RA_subset <- subset_samples(ps_RA, sample_type != "forest")
ps_RA_subset <- subset_samples(ps_RA_subset, depth == i)

# lets make sure all missing taxa are removed
prev0 = apply(X = otu_table(ps_RA_subset),
              MARGIN = ifelse(taxa_are_rows(ps_RA_subset), yes = 1, no = 2),
              FUN = function(x){sum(x > 0)})

# Execute prevalence filter, using `prune_taxa()` function
ps_RA_subset = prune_taxa((prev0 > 0), ps_RA_subset)

meta_subset <- meta(ps_RA_subset)

ps_RA_subset_bray <- phyloseq::distance(ps_RA_subset, method = "bray")

set.seed(777)
x <- as.data.frame(pairwise.adonis(ps_RA_subset_bray, factors=meta_subset$sample_type))
x$depth <- i
print(x)
}
##                     pairs Df SumsOfSqs  F.Model        R2 p.value p.adjusted
## 1  conventional vs meadow  1 0.3750125 1.841343 0.1093347   0.127      0.381
## 2 conventional vs organic  1 0.6560974 3.970385 0.2092939   0.013      0.039
## 3       meadow vs organic  1 0.3202003 1.587460 0.1018421   0.182      0.546
##   sig  depth
## 1     0...10
## 2   . 0...10
## 3     0...10
##                     pairs Df SumsOfSqs  F.Model         R2 p.value p.adjusted
## 1  conventional vs meadow  1 0.3639673 2.205031 0.12816200   0.085      0.255
## 2 conventional vs organic  1 0.1200414 1.070250 0.06659824   0.347      1.000
## 3       meadow vs organic  1 0.6154410 3.748793 0.21121396   0.032      0.096
##   sig   depth
## 1     10...20
## 2     10...20
## 3     10...20
##                     pairs Df SumsOfSqs   F.Model         R2 p.value p.adjusted
## 1  conventional vs meadow  1 0.9973650 4.0361192 0.21202427   0.001      0.003
## 2 conventional vs organic  1 0.1803777 0.7225702 0.04595751   0.647      1.000
## 3       meadow vs organic  1 1.1247420 7.0350643 0.33444463   0.002      0.006
##   sig   depth
## 1   * 20...30
## 2     20...30
## 3   * 20...30
##                     pairs Df  SumsOfSqs   F.Model         R2 p.value p.adjusted
## 1  conventional vs meadow  1 0.57259160 1.8617351 0.11737273   0.137      0.411
## 2 conventional vs organic  1 0.06892252 0.2136963 0.01749645   0.912      1.000
## 3       meadow vs organic  1 0.49742310 1.9092142 0.13726255   0.110      0.330
##   sig   depth
## 1     30...40
## 2     30...40
## 3     30...40
##                     pairs Df SumsOfSqs   F.Model         R2 p.value p.adjusted
## 1  conventional vs meadow  1 0.3635791 0.8991967 0.09083532   0.503      1.000
## 2 conventional vs organic  1 0.2129225 0.6313847 0.07314988   0.620      1.000
## 3       meadow vs organic  1 0.6051674 1.8342283 0.16929939   0.175      0.525
##   sig depth
## 1     40...
## 2     40...
## 3     40...

RESULTS STEP 13: AMF bubble plot

1. Load data and add soil_type_depth

library(vegan)
library(goeveg)
library(metagMisc)
library(phyloseq)
library('phyloseq')
library("cowplot")
library("dplyr")
library("ggplot2")
library("vegan")
library("microbiome")
library("tibble")
library(stringr)
library(reshape2)
library(tidyr)
library(car)

setwd('C:\\Users\\lehakkin\\PhD\\Fungal_ITS_soil_vertical_profile')

load('ps_FINAL')
ps
## phyloseq-class experiment-level object
## otu_table()   OTU Table:         [ 20610 taxa and 140 samples ]
## sample_data() Sample Data:       [ 140 samples by 32 sample variables ]
## tax_table()   Taxonomy Table:    [ 20610 taxa by 7 taxonomic ranks ]
# add sample_type_depth# add soil_type_depth
sample_data(ps)$soil_type_depth <- paste(sample_data(ps)$sample_type, "_", sample_data(ps)$depth)

meta <- meta(ps)

2. Keep only Glomeromycota

Note! first transform to RA and then filter AMF

# Relative abundance
ps_RA <- microbiome::transform(ps, 'compositional')
ps_RA
## phyloseq-class experiment-level object
## otu_table()   OTU Table:         [ 20610 taxa and 140 samples ]
## sample_data() Sample Data:       [ 140 samples by 33 sample variables ]
## tax_table()   Taxonomy Table:    [ 20610 taxa by 7 taxonomic ranks ]
# keep only Glomeromycota
ps2_std_G <- subset_taxa(ps_RA, phylum=="Glomeromycota")
ps2_std_G
## phyloseq-class experiment-level object
## otu_table()   OTU Table:         [ 263 taxa and 140 samples ]
## sample_data() Sample Data:       [ 140 samples by 33 sample variables ]
## tax_table()   Taxonomy Table:    [ 263 taxa by 7 taxonomic ranks ]
# 263 taxa and 140 samples

3. Aggregate samples by soil_type_depth

For the bubble plot, we want to get average values based on soil_type_depth

library("metagMisc")

ps2_std_G <- phyloseq_average(
  ps2_std_G,
  avg_type = "arithmetic",
  group = "soil_type_depth",
  drop_group_zero = FALSE,
  verbose = FALSE,
  progress = NULL
)

ps2_std_G
## phyloseq-class experiment-level object
## otu_table()   OTU Table:         [ 263 taxa and 20 samples ]
## tax_table()   Taxonomy Table:    [ 263 taxa by 7 taxonomic ranks ]
# aggregate

ps2_std_G <- aggregate_rare(ps2_std_G, level = 'genus', detection = 0, prevalence = 0, include.lowest = TRUE)
ps2_std_G
## phyloseq-class experiment-level object
## otu_table()   OTU Table:         [ 17 taxa and 20 samples ]
## tax_table()   Taxonomy Table:    [ 17 taxa by 1 taxonomic ranks ]

4. Extract tax and otu tables

tax_table_G <- as.data.frame(as.matrix(tax_table(ps2_std_G)))
OTU_genus_table_G <- as.data.frame(as.matrix(otu_table(ps2_std_G)))

5. Add total AMF to otu table and make wide

# make OTUs into columns
OTU_genus_table_G <- t(OTU_genus_table_G)
OTU_genus_table_G <- as.data.frame(OTU_genus_table_G)

# add total of phylum glomeracea
OTU_genus_table_G <- cbind(
  OTU_genus_table_G,
  total = rowSums(OTU_genus_table_G
))

# Change your data structure from a wide format to a long format. Put any variables that are not OTUs/genus, into the id parameter

#convert data frame from a wide format to a long format
pc <- tibble::rownames_to_column(OTU_genus_table_G, "Sample")
pcm = melt(pc, id = c("Sample"))

6. Combine otu wide and meta

meta_x <- meta[!duplicated(meta$soil_type_depth),]
# Vector of column names to select
columns_to_keep <- c("depth", "sample_type", "soil_type_depth")

# Using base R to select columns
meta_x <- meta_x[, columns_to_keep]

rownames(meta_x) <- NULL

# add metadata
pcm <- left_join(pcm, meta_x, by=c("Sample"="soil_type_depth"))

# change the column name
colnames(pcm)[1] <- "soil_type_depth"

7. Reorder soil_type_depth

pcm$soil_type_depth <- as.factor(pcm$soil_type_depth)

# Change the order of samples
pcm$soil_type_depth <- factor(pcm$soil_type_depth,levels=c("forest _ 0...10", "forest _ 10...20", "forest _ 20...30", "forest _ 30...40", "forest _ 40...", "meadow _ 0...10", "meadow _ 10...20", "meadow _ 20...30", "meadow _ 30...40", "meadow _ 40...", "organic _ 0...10", "organic _ 10...20", "organic _ 20...30", "organic _ 30...40", "organic _ 40...", "conventional _ 0...10", "conventional _ 10...20", "conventional _ 20...30", "conventional _ 30...40", "conventional _ 40..."))

levels(pcm$soil_type_depth)
##  [1] "forest _ 0...10"        "forest _ 10...20"       "forest _ 20...30"      
##  [4] "forest _ 30...40"       "forest _ 40..."         "meadow _ 0...10"       
##  [7] "meadow _ 10...20"       "meadow _ 20...30"       "meadow _ 30...40"      
## [10] "meadow _ 40..."         "organic _ 0...10"       "organic _ 10...20"     
## [13] "organic _ 20...30"      "organic _ 30...40"      "organic _ 40..."       
## [16] "conventional _ 0...10"  "conventional _ 10...20" "conventional _ 20...30"
## [19] "conventional _ 30...40" "conventional _ 40..."

8. Add other taxonomic levels

# add the other taxonomic level annotations
TAX <- as.data.frame(as.matrix(tax_table(ps)))
row.names(TAX) <- NULL
TAX <- TAX[, 1:6]
TAX <- filter(TAX, phylum == "Glomeromycota")


TAX <- TAX[!duplicated(TAX$genus),]
pcm2 <- left_join(pcm, TAX, by = c("variable" = "genus"))

9. Add the taxonomic rank letter to genus

pcm2$variable[pcm2$variable == "Ambisporaceae_unclassified"] <- "Ambisporaceae (f)"
pcm2$variable[pcm2$variable == "Archaeospora"] <- "Archaeospora (g)"
pcm2$variable[pcm2$variable == "Archaeosporaceae_unclassified"] <- "Archaeosporaceae (f)"
pcm2$variable[pcm2$variable == "Archaeosporales_unclassified"] <- "Archaeosporales (o)"
pcm2$variable[pcm2$variable == "Diversispora"] <- "Diversispora (g)"
pcm2$variable[pcm2$variable == "Claroideoglomus"] <- "Claroideoglomus (g)"
pcm2$variable[pcm2$variable == "Entrophospora"] <- "Entrophospora (g)"
pcm2$variable[pcm2$variable == "Claroideoglomeraceae_unclassified"] <- "Claroideoglomeraceae (f)"
pcm2$variable[pcm2$variable == "Dominikia"] <- "Dominikia (g)"
pcm2$variable[pcm2$variable == "Funneliformis"] <- "Funneliformis (g)"
pcm2$variable[pcm2$variable == "Glomeraceae_unclassified"] <- "Glomeraceae (f)"
pcm2$variable[pcm2$variable == "Glomus"] <- "Glomus (g)"
pcm2$variable[pcm2$variable == "Microdominikia"] <- "Microdominikia (g)"
pcm2$variable[pcm2$variable == "Rhizophagus"] <- "Rhizophagus (g)"
pcm2$variable[pcm2$variable == "Glomeromycota_unclassified"] <- "Glomeromycota (p)"
pcm2$variable[pcm2$variable == "Paraglomus"] <- "Paraglomus (g)"
pcm2$variable[pcm2$variable == "Paraglomerales_unclassified"] <- "Paraglomerales (o)"
pcm2$variable[pcm2$variable == "total"] <- "total"
# add total to family
pcm2$family <- pcm2$family %>% replace_na('total')

10. Change family names

pcm2$family[pcm2$family=="Ambisporaceae"] <- "Ambisporaceae (f)"
pcm2$family[pcm2$family=="Archaeosporaceae"] <- "Archaeosporaceae (f)"
pcm2$family[pcm2$family=="Archaeosporales_unclassified"] <- "Archaeosporales (o)"
pcm2$family[pcm2$family=="Diversisporaceae"] <- "Diversisporaceae (f)"
pcm2$family[pcm2$family=="Entrophosporaceae"] <- "Entrophosporaceae (f)"
pcm2$family[pcm2$family=="Claroideoglomeraceae"] <- "Claroideoglomeraceae (f)"
pcm2$family[pcm2$family=="Glomeraceae"] <- "Glomeraceae (f)"
pcm2$family[pcm2$family=="Glomeromycota_unclassified"] <- "Glomeromycota (p)"
pcm2$family[pcm2$family=="Paraglomeraceae"] <- "Paraglomeraceae (f)"
pcm2$family[pcm2$family=="Paraglomerales_unclassified"] <- "Paraglomerales (o)"
# make into factor
pcm2$family <- as.factor(pcm2$family)

levels(pcm2$family)
##  [1] "Ambisporaceae (f)"        "Archaeosporaceae (f)"    
##  [3] "Archaeosporales (o)"      "Claroideoglomeraceae (f)"
##  [5] "Diversisporaceae (f)"     "Entrophosporaceae (f)"   
##  [7] "Glomeraceae (f)"          "Glomeromycota (p)"       
##  [9] "Paraglomeraceae (f)"      "Paraglomerales (o)"      
## [11] "total"

11. Change Family order

# Change level family

pcm2$family <- factor(pcm2$family, levels = c("Ambisporaceae (f)", "Archaeosporaceae (f)", "Diversisporaceae (f)", "Glomeraceae (f)", "Claroideoglomeraceae (f)", "Entrophosporaceae (f)", "Paraglomeraceae (f)", "Glomeromycota (p)", "Archaeosporales (o)", "Paraglomerales (o)", "total"))

12. Change genus order

# make variable into factor
pcm2$variable <- as.factor(pcm2$variable)
# Change genus level order

pcm2$variable <- factor(pcm2$variable, levels = c("Archaeosporales (o)", "Ambisporaceae (f)", "Archaeosporaceae (f)", "Archaeospora (g)", "Diversispora (g)", "Glomeraceae (f)", "Dominikia (g)", "Funneliformis (g)", "Glomus (g)", "Microdominikia (g)", "Rhizophagus (g)", "Claroideoglomeraceae (f)", "Claroideoglomus (g)", "Entrophospora (g)", "Paraglomerales (o)", "Paraglomus (g)", "Glomeromycota (p)", "total"))

levels(pcm2$variable)
##  [1] "Archaeosporales (o)"      "Ambisporaceae (f)"       
##  [3] "Archaeosporaceae (f)"     "Archaeospora (g)"        
##  [5] "Diversispora (g)"         "Glomeraceae (f)"         
##  [7] "Dominikia (g)"            "Funneliformis (g)"       
##  [9] "Glomus (g)"               "Microdominikia (g)"      
## [11] "Rhizophagus (g)"          "Claroideoglomeraceae (f)"
## [13] "Claroideoglomus (g)"      "Entrophospora (g)"       
## [15] "Paraglomerales (o)"       "Paraglomus (g)"          
## [17] "Glomeromycota (p)"        "total"

13. Assign AMF guild based on AMF family

pcm2$AMF_guild <- NA

pcm2$AMF_guild[pcm2$family=="Ambisporaceae (f)"] <- "ancestral"
pcm2$AMF_guild[pcm2$family=="Archaeosporaceae (f)"] <- "ancestral"
pcm2$AMF_guild[pcm2$family=="Archaeosporales (o)"] <- "unknown"
pcm2$AMF_guild[pcm2$family=="Diversisporaceae (f)"] <- "edaphophilic"
pcm2$AMF_guild[pcm2$family=="Claroideoglomeraceae (f)"] <- "rhizophilic"
pcm2$AMF_guild[pcm2$family=="Entrophosporaceae (f)"] <- "rhizophilic"
pcm2$AMF_guild[pcm2$family=="Glomeraceae (f)"] <- "rhizophilic"
pcm2$AMF_guild[pcm2$family=="Glomeromycota (p)"] <- "unknown"
pcm2$AMF_guild[pcm2$family=="Paraglomeraceae (f)"] <- "rhizophilic"
pcm2$AMF_guild[pcm2$family=="Paraglomerales (o)"] <- "unknown"

14. Add new depth

pcm2$new_depth <- NA
pcm2$new_depth[pcm2$depth=="0...10"]<-"0-10"
pcm2$new_depth[pcm2$depth=="10...20"]<-"10-20"
pcm2$new_depth[pcm2$depth=="20...30"]<-"20-30"
pcm2$new_depth[pcm2$depth=="30...40"]<-"30-40"
pcm2$new_depth[pcm2$depth=="40..."]<-"40-80"

15. Modify tax level: Order

Currently, AMF are divided in five orders (Archaeosporales, Diversisporales, Glomerales, Entrophosporales and Paraglomerales)

# modifying the column of data frame
pcm2$order <- as.factor(pcm2$order)

levels(pcm2$order)
## [1] "Archaeosporales"            "Diversisporales"           
## [3] "Entrophosporales"           "Glomerales"                
## [5] "Glomeromycota_unclassified" "Paraglomerales"
# Change levels
pcm2$order <- factor(pcm2$order, levels = c("Archaeosporales", "Diversisporales", "Entrophosporales", "Glomerales", "Paraglomerales", "Glomeromycota_unclassified"), labels = c("Archaeosporales (o)", "Diversisporales (o)", "Entrophosporales (o)", "Glomerales (o)", "Paraglomerales (o)", "Glomeromycota (p)"))

# add order = Glomeromycota to total
pcm2$order <- pcm2$order %>% replace_na('Glomeromycota (p)')

levels(pcm2$order)
## [1] "Archaeosporales (o)"  "Diversisporales (o)"  "Entrophosporales (o)"
## [4] "Glomerales (o)"       "Paraglomerales (o)"   "Glomeromycota (p)"

16. Plot the Bubble plot

For a bubble plot, you are using geom_point and scaling the size to your value (relative abundance) column.

I checked that: - thesmallest non-zero is 8.314653e-07 or 0.0000008314653 - and the biggest value is 1.164825e-01 or 0.1164825

So lets set the limits in the figure accordingly

# color for AMF_guild
MyPalette = c("red",  "blue", "#ff028d", "black")

# where ("Ancestral" = "red", "Rhizophilic" = "#ff028d", "Edaphophilic" =  "blue", "Unknown" = "black")

xx2 = ggplot(pcm2, aes(x = new_depth, y = variable)) + 
  geom_point(aes(size = value, fill = order, color = AMF_guild), shape = 21, alpha = 1, stroke = 0) + 
  scale_fill_manual(values=c("#99CC99", "#83adb5", "#692D6B", "#D094D2", "#cba69e", "#A9A9A9")) + 
  scale_size_continuous(limits = c(0.0000001, 0.2), range = c(3,30), breaks = c(0.0000001, 0.0001, 0.001, 0.01, 0.2)) + 
  labs( x= "soil layer (cm)", y = "", size = "Relative abundance", fill = "", color = "AMF guild")  + 
  theme(legend.key=element_blank(), 
        axis.text.x = element_text(colour = "black", size = 14, angle = 45, vjust = 1, hjust = 1),
        axis.title.x=element_text(colour = "black", size = 14,face="bold"), 
        axis.text.y = element_text(colour = "black", size = 14), 
        legend.text = element_text(size = 14, colour ="black"), 
        legend.title = element_text(size = 16, face = "bold"), 
        panel.background = element_blank(), panel.border = element_rect(colour = "black", fill = NA, size = 1.2)) + facet_wrap(vars(sample_type), nrow = 1, ncol = 4) +
  scale_y_discrete(limits = rev(levels(pcm2$variable))) + guides(fill = guide_legend(order = 1, ncol = 2, override.aes = list(size = 8, shape = 21)), color = guide_legend(order = 2, ncol = 2, override.aes = list(shape = 15, size = 8, stroke = 2)))
#+ theme(legend.margin=margin(1,1,1,1), legend.box.spacing = unit(1.6, "pt"))
#+ theme(legend.title.align=0.5)#+ theme(legend.position="bottom")
f1 <- xx2 + scale_color_manual(values = (MyPalette)) + theme(strip.text = element_text(size = 16, color = "black"))
#+  guides(color = guide_legend(nrow = 2, override.aes = list(shape = 15, size = 2, stroke = 2)))   #+ coord_flip() + guide_legend(ncol=2)

### change y axis label colors based on AMF guild

f2 <- f1 + theme(axis.text.y = element_text(color = c("black", "black", "#ff028d", "black", "#ff028d", "#ff028d", "#ff028d", "#ff028d", "#ff028d", "#ff028d", "#ff028d", "#ff028d", "#ff028d", "blue", "red", "red", "red", "black")))

print(f2)

Save with 1400 width and 550 height

17. Statistical testing

I have previously tested the AMF genera, family, order and guild, but no other difference was found other than:

More Ambisporaceae in forest compared to other treatments (Wilcoxon).

So I will not include the tests here, They were done similarly as before e.g. for AMF relative abundance.


RESULTS STEP 14: Spearman correlations with soil properties and fungal and AMF richness as well as with AMF relative abundance

Simple spearman rank correlation with richness and env. variables are done WITHOUT forest because forest soil is so different environment compared to meadow, organic and conventional soils

library("ggpubr")
library('phyloseq')
library("cowplot")
library("dplyr")
library("ggplot2")
library("vegan")
library("microbiome")
library("tibble")
library(stringr)
library(reshape2)
library(tidyr)

setwd('C:\\Users\\lehakkin\\PhD\\Fungal_ITS_soil_vertical_profile')

load('ps_FINAL')
ps
## phyloseq-class experiment-level object
## otu_table()   OTU Table:         [ 20610 taxa and 140 samples ]
## sample_data() Sample Data:       [ 140 samples by 32 sample variables ]
## tax_table()   Taxonomy Table:    [ 20610 taxa by 7 taxonomic ranks ]

1. remove forest

ps_nf <- subset_samples(ps, sample_type!= "forest")

meta <- meta(ps_nf)
names(meta)
##  [1] "sampleID"             "plot"                 "sampling_position"   
##  [4] "depth"                "depth_numerical"      "vegetation"          
##  [7] "sample_type"          "root_mgg"             "pH_H2O"              
## [10] "EC_uScm"              "C_g_per_kg"           "N_gkg"               
## [13] "TP_gkg"               "Alox_mmolkg"          "Feox_mmolkg"         
## [16] "oxides_mmolkg"        "PH2O_mgkg"            "Porg_mgkg"           
## [19] "DOC_mgkg"             "Pinorg_mgkg"          "C_per_N"             
## [22] "observed"             "chao1"                "shannon"             
## [25] "observed_sng"         "chao1_sng"            "shannon_sng"         
## [28] "log_root"             "saprotroph_richness"  "symbiotroph_richness"
## [31] "pathotroph_richness"  "AMF_richness"

2. Spearman for richnesses

Soil properties to test against:

C_per_N pH_H2O C_g_per_kg Feox_mmolkg DOC_mgkg N_gkg depth_numerical Porg_mgkg log_root TP_gkg Pinorg_mgkg Alox_mmolkg PH2O_mgkg

2.1. Observed richness

env <- c("C_per_N", "pH_H2O", "C_g_per_kg", "Feox_mmolkg", "DOC_mgkg", "N_gkg", "depth_numerical", "Porg_mgkg", "log_root", "TP_gkg", "Pinorg_mgkg", "Alox_mmolkg", "PH2O_mgkg")

for (i in env) {
   # Filter out rows with NA values in the columns of interest
  valid_data <- meta[!is.na(meta$observed) & !is.na(meta[[i]]), ]
  
  # Perform Spearman correlation test
  x <- cor.test(valid_data$observed, valid_data[[i]], method = "spearman")
  
  # Print the result
  print(paste("Correlation test for:", i))
  print(x)
  }
## [1] "Correlation test for: C_per_N"
## 
##  Spearman's rank correlation rho
## 
## data:  valid_data$observed and valid_data[[i]]
## S = 122420, p-value = 7.695e-15
## alternative hypothesis: true rho is not equal to 0
## sample estimates:
##       rho 
## 0.6239005 
## 
## [1] "Correlation test for: pH_H2O"
## 
##  Spearman's rank correlation rho
## 
## data:  valid_data$observed and valid_data[[i]]
## S = 543813, p-value < 2.2e-16
## alternative hypothesis: true rho is not equal to 0
## sample estimates:
##        rho 
## -0.6707008 
## 
## [1] "Correlation test for: C_g_per_kg"
## 
##  Spearman's rank correlation rho
## 
## data:  valid_data$observed and valid_data[[i]]
## S = 98808, p-value < 2.2e-16
## alternative hypothesis: true rho is not equal to 0
## sample estimates:
##       rho 
## 0.6964415 
## 
## [1] "Correlation test for: Feox_mmolkg"
## 
##  Spearman's rank correlation rho
## 
## data:  valid_data$observed and valid_data[[i]]
## S = 145648, p-value = 2.38e-11
## alternative hypothesis: true rho is not equal to 0
## sample estimates:
##       rho 
## 0.5525416 
## 
## [1] "Correlation test for: DOC_mgkg"
## 
##  Spearman's rank correlation rho
## 
## data:  valid_data$observed and valid_data[[i]]
## S = 107044, p-value < 2.2e-16
## alternative hypothesis: true rho is not equal to 0
## sample estimates:
##       rho 
## 0.6711408 
## 
## [1] "Correlation test for: N_gkg"
## 
##  Spearman's rank correlation rho
## 
## data:  valid_data$observed and valid_data[[i]]
## S = 103654, p-value < 2.2e-16
## alternative hypothesis: true rho is not equal to 0
## sample estimates:
##       rho 
## 0.6815556 
## 
## [1] "Correlation test for: depth_numerical"
## 
##  Spearman's rank correlation rho
## 
## data:  valid_data$observed and valid_data[[i]]
## S = 568043, p-value < 2.2e-16
## alternative hypothesis: true rho is not equal to 0
## sample estimates:
##        rho 
## -0.7451402 
## 
## [1] "Correlation test for: Porg_mgkg"
## 
##  Spearman's rank correlation rho
## 
## data:  valid_data$observed and valid_data[[i]]
## S = 108419, p-value < 2.2e-16
## alternative hypothesis: true rho is not equal to 0
## sample estimates:
##       rho 
## 0.6587925 
## 
## [1] "Correlation test for: log_root"
## 
##  Spearman's rank correlation rho
## 
## data:  valid_data$observed and valid_data[[i]]
## S = 147129, p-value = 3.731e-11
## alternative hypothesis: true rho is not equal to 0
## sample estimates:
##       rho 
## 0.5479921 
## 
## [1] "Correlation test for: TP_gkg"
## 
##  Spearman's rank correlation rho
## 
## data:  valid_data$observed and valid_data[[i]]
## S = 134591, p-value = 6.626e-13
## alternative hypothesis: true rho is not equal to 0
## sample estimates:
##       rho 
## 0.5865102 
## 
## [1] "Correlation test for: Pinorg_mgkg"
## 
##  Spearman's rank correlation rho
## 
## data:  valid_data$observed and valid_data[[i]]
## S = 361273, p-value = 0.2224
## alternative hypothesis: true rho is not equal to 0
## sample estimates:
##        rho 
## -0.1099019 
## 
## [1] "Correlation test for: Alox_mmolkg"
## 
##  Spearman's rank correlation rho
## 
## data:  valid_data$observed and valid_data[[i]]
## S = 130503, p-value = 1.582e-13
## alternative hypothesis: true rho is not equal to 0
## sample estimates:
##     rho 
## 0.59907 
## 
## [1] "Correlation test for: PH2O_mgkg"
## 
##  Spearman's rank correlation rho
## 
## data:  valid_data$observed and valid_data[[i]]
## S = 258057, p-value = 0.03667
## alternative hypothesis: true rho is not equal to 0
## sample estimates:
##       rho 
## 0.1878602

2.2. AMF_richness

env <- c("C_per_N", "pH_H2O", "C_g_per_kg", "Feox_mmolkg", "DOC_mgkg", "N_gkg", "depth_numerical", "Porg_mgkg", "log_root", "TP_gkg", "Pinorg_mgkg", "Alox_mmolkg", "PH2O_mgkg")

for (i in env) {
   # Filter out rows with NA values in the columns of interest
  valid_data <- meta[!is.na(meta$AMF_richness) & !is.na(meta[[i]]), ]
  
  # Perform Spearman correlation test
  x <- cor.test(valid_data$AMF_richness, valid_data[[i]], method = "spearman")
  
  # Print the result
  print(paste("Correlation test for:", i))
  print(x)
  }
## [1] "Correlation test for: C_per_N"
## 
##  Spearman's rank correlation rho
## 
## data:  valid_data$AMF_richness and valid_data[[i]]
## S = 142617, p-value = 9.284e-12
## alternative hypothesis: true rho is not equal to 0
## sample estimates:
##       rho 
## 0.5618529 
## 
## [1] "Correlation test for: pH_H2O"
## 
##  Spearman's rank correlation rho
## 
## data:  valid_data$AMF_richness and valid_data[[i]]
## S = 512461, p-value = 2.496e-12
## alternative hypothesis: true rho is not equal to 0
## sample estimates:
##        rho 
## -0.5743796 
## 
## [1] "Correlation test for: C_g_per_kg"
## 
##  Spearman's rank correlation rho
## 
## data:  valid_data$AMF_richness and valid_data[[i]]
## S = 159586, p-value = 1.27e-09
## alternative hypothesis: true rho is not equal to 0
## sample estimates:
##       rho 
## 0.5097203 
## 
## [1] "Correlation test for: Feox_mmolkg"
## 
##  Spearman's rank correlation rho
## 
## data:  valid_data$AMF_richness and valid_data[[i]]
## S = 124010, p-value = 1.425e-14
## alternative hypothesis: true rho is not equal to 0
## sample estimates:
##       rho 
## 0.6190164 
## 
## [1] "Correlation test for: DOC_mgkg"
## 
##  Spearman's rank correlation rho
## 
## data:  valid_data$AMF_richness and valid_data[[i]]
## S = 157639, p-value = 7.53e-10
## alternative hypothesis: true rho is not equal to 0
## sample estimates:
##       rho 
## 0.5157014 
## 
## [1] "Correlation test for: N_gkg"
## 
##  Spearman's rank correlation rho
## 
## data:  valid_data$AMF_richness and valid_data[[i]]
## S = 176552, p-value = 8.089e-08
## alternative hypothesis: true rho is not equal to 0
## sample estimates:
##       rho 
## 0.4575968 
## 
## [1] "Correlation test for: depth_numerical"
## 
##  Spearman's rank correlation rho
## 
## data:  valid_data$AMF_richness and valid_data[[i]]
## S = 453384, p-value = 5.841e-06
## alternative hypothesis: true rho is not equal to 0
## sample estimates:
##        rho 
## -0.3928852 
## 
## [1] "Correlation test for: Porg_mgkg"
## 
##  Spearman's rank correlation rho
## 
## data:  valid_data$AMF_richness and valid_data[[i]]
## S = 204283, p-value = 4.676e-05
## alternative hypothesis: true rho is not equal to 0
## sample estimates:
##       rho 
## 0.3570961 
## 
## [1] "Correlation test for: log_root"
## 
##  Spearman's rank correlation rho
## 
## data:  valid_data$AMF_richness and valid_data[[i]]
## S = 189803, p-value = 1.324e-06
## alternative hypothesis: true rho is not equal to 0
## sample estimates:
##       rho 
## 0.4168873 
## 
## [1] "Correlation test for: TP_gkg"
## 
##  Spearman's rank correlation rho
## 
## data:  valid_data$AMF_richness and valid_data[[i]]
## S = 252601, p-value = 0.01205
## alternative hypothesis: true rho is not equal to 0
## sample estimates:
##       rho 
## 0.2239608 
## 
## [1] "Correlation test for: Pinorg_mgkg"
## 
##  Spearman's rank correlation rho
## 
## data:  valid_data$AMF_richness and valid_data[[i]]
## S = 384409, p-value = 0.0434
## alternative hypothesis: true rho is not equal to 0
## sample estimates:
##        rho 
## -0.1809802 
## 
## [1] "Correlation test for: Alox_mmolkg"
## 
##  Spearman's rank correlation rho
## 
## data:  valid_data$AMF_richness and valid_data[[i]]
## S = 133398, p-value = 4.39e-13
## alternative hypothesis: true rho is not equal to 0
## sample estimates:
##       rho 
## 0.5901751 
## 
## [1] "Correlation test for: PH2O_mgkg"
## 
##  Spearman's rank correlation rho
## 
## data:  valid_data$AMF_richness and valid_data[[i]]
## S = 349401, p-value = 0.271
## alternative hypothesis: true rho is not equal to 0
## sample estimates:
##         rho 
## -0.09961067

3. Spearman for AMF RA

ps_nf_RA <- transform(ps_nf, "compositional")
ps_nf_RA
## phyloseq-class experiment-level object
## otu_table()   OTU Table:         [ 20610 taxa and 125 samples ]
## sample_data() Sample Data:       [ 125 samples by 32 sample variables ]
## tax_table()   Taxonomy Table:    [ 20610 taxa by 7 taxonomic ranks ]
gm <- aggregate_rare(ps_nf_RA, level = "phylum", detection = 0, prevalence = 0 )
gm
## phyloseq-class experiment-level object
## otu_table()   OTU Table:         [ 14 taxa and 125 samples ]
## sample_data() Sample Data:       [ 125 samples by 32 sample variables ]
## tax_table()   Taxonomy Table:    [ 14 taxa by 1 taxonomic ranks ]
df <- psmelt(gm)
df <- subset(df, OTU == "Glomeromycota")

env <- c("C_per_N", "pH_H2O", "C_g_per_kg", "Feox_mmolkg", "DOC_mgkg", "N_gkg", "depth_numerical", "Porg_mgkg", "log_root", "TP_gkg", "Pinorg_mgkg", "Alox_mmolkg", "PH2O_mgkg")

for (i in env) {
  # Perform Spearman correlation test
  x <- cor.test(df$Abundance, df[[i]], method = "spearman", na.rm = TRUE)
  
  # Print the result
  print(paste("Correlation test for:", i))
  print(x)
  }
## [1] "Correlation test for: C_per_N"
## 
##  Spearman's rank correlation rho
## 
## data:  df$Abundance and df[[i]]
## S = 338926, p-value = 0.6479
## alternative hypothesis: true rho is not equal to 0
## sample estimates:
##         rho 
## -0.04124707 
## 
## [1] "Correlation test for: pH_H2O"
## 
##  Spearman's rank correlation rho
## 
## data:  df$Abundance and df[[i]]
## S = 305311, p-value = 0.492
## alternative hypothesis: true rho is not equal to 0
## sample estimates:
##        rho 
## 0.06202587 
## 
## [1] "Correlation test for: C_g_per_kg"
## 
##  Spearman's rank correlation rho
## 
## data:  df$Abundance and df[[i]]
## S = 369419, p-value = 0.1336
## alternative hypothesis: true rho is not equal to 0
## sample estimates:
##        rho 
## -0.1349274 
## 
## [1] "Correlation test for: Feox_mmolkg"
## 
##  Spearman's rank correlation rho
## 
## data:  df$Abundance and df[[i]]
## S = 320234, p-value = 0.8579
## alternative hypothesis: true rho is not equal to 0
## sample estimates:
##        rho 
## 0.01617915 
## 
## [1] "Correlation test for: DOC_mgkg"
## 
##  Spearman's rank correlation rho
## 
## data:  df$Abundance and df[[i]]
## S = 377055, p-value = 0.0777
## alternative hypothesis: true rho is not equal to 0
## sample estimates:
##       rho 
## -0.158386 
## 
## [1] "Correlation test for: N_gkg"
## 
##  Spearman's rank correlation rho
## 
## data:  df$Abundance and df[[i]]
## S = 384219, p-value = 0.0441
## alternative hypothesis: true rho is not equal to 0
## sample estimates:
##        rho 
## -0.1803965 
## 
## [1] "Correlation test for: depth_numerical"
## 
##  Spearman's rank correlation rho
## 
## data:  df$Abundance and df[[i]]
## S = 257422, p-value = 0.01924
## alternative hypothesis: true rho is not equal to 0
## sample estimates:
##       rho 
## 0.2091478 
## 
## [1] "Correlation test for: Porg_mgkg"
## 
##  Spearman's rank correlation rho
## 
## data:  df$Abundance and df[[i]]
## S = 394534, p-value = 0.006855
## alternative hypothesis: true rho is not equal to 0
## sample estimates:
##        rho 
## -0.2416477 
## 
## [1] "Correlation test for: log_root"
## 
##  Spearman's rank correlation rho
## 
## data:  df$Abundance and df[[i]]
## S = 368781, p-value = 0.1393
## alternative hypothesis: true rho is not equal to 0
## sample estimates:
##        rho 
## -0.1329676 
## 
## [1] "Correlation test for: TP_gkg"
## 
##  Spearman's rank correlation rho
## 
## data:  df$Abundance and df[[i]]
## S = 430932, p-value = 0.0002288
## alternative hypothesis: true rho is not equal to 0
## sample estimates:
##       rho 
## -0.323907 
## 
## [1] "Correlation test for: Pinorg_mgkg"
## 
##  Spearman's rank correlation rho
## 
## data:  df$Abundance and df[[i]]
## S = 348926, p-value = 0.4251
## alternative hypothesis: true rho is not equal to 0
## sample estimates:
##         rho 
## -0.07197018 
## 
## [1] "Correlation test for: Alox_mmolkg"
## 
##  Spearman's rank correlation rho
## 
## data:  df$Abundance and df[[i]]
## S = 313002, p-value = 0.6707
## alternative hypothesis: true rho is not equal to 0
## sample estimates:
##        rho 
## 0.03839783 
## 
## [1] "Correlation test for: PH2O_mgkg"
## 
##  Spearman's rank correlation rho
## 
## data:  df$Abundance and df[[i]]
## S = 443246, p-value = 5.642e-06
## alternative hypothesis: true rho is not equal to 0
## sample estimates:
##        rho 
## -0.3949508

1. Upload plant diversity data

library("cowplot")
library("dplyr")
library("ggplot2")
library("vegan")
library("tibble")

plant.div <- read.csv2("C:\\Users\\lehakkin\\PhD\\Fungal_ITS_soil_vertical_profile\\Plant_diversity_Yoni_2011_2012.csv", dec = ".")

plant.div$Year <- as.factor(plant.div$Year)

2. Non-crop vegetation cover (%)

2.1. Year 2011

Let’s only include organic and conventional in the non-crop coverage analysis as meadow does not have a crop

## first lets test if variances are equal so we know if use ANOVA or Kruskal test:
library(car)

df <- subset(plant.div, Year==2011)
df <- subset(df, Treatment!="meadow")

# Using leveneTest()
result = leveneTest(non_crop_cover ~ Treatment, df)
# print the result
print(result)
## Levene's Test for Homogeneity of Variance (center = median)
##       Df F value Pr(>F)
## group  1  0.6401 0.4542
##        6
# high p-value means that variances are similar and that we can do ANOVA

# Shapiro-Wilk’s method is widely recommended for normality test and it provides better power than K-S. It is based on the correlation between the data and the corresponding normal scores.

shapiro.test(df$non_crop_cover)
## 
##  Shapiro-Wilk normality test
## 
## data:  df$non_crop_cover
## W = 0.98115, p-value = 0.9684
# high p value means that data is normally distributed

res.aov <- aov(non_crop_cover ~ Treatment, data = df)
summary(res.aov)
##             Df Sum Sq Mean Sq F value  Pr(>F)   
## Treatment    1   3793    3793   14.91 0.00834 **
## Residuals    6   1526     254                   
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# As the ANOVA test is significant, we can compute Tukey HSD
TukeyHSD(res.aov)
##   Tukey multiple comparisons of means
##     95% family-wise confidence level
## 
## Fit: aov(formula = non_crop_cover ~ Treatment, data = df)
## 
## $Treatment
##                       diff      lwr      upr     p adj
## organic-conventional 43.55 15.95684 71.14316 0.0083435

Non-crop vegetation cover in 2011 is significantly different between conventional and organic

2.2. Year 2012

df <- subset(plant.div, Year==2012)
df <- subset(df, Treatment!="meadow")

# Using leveneTest()
result = leveneTest(non_crop_cover ~ Treatment, df)
# print the result
print(result)
## Levene's Test for Homogeneity of Variance (center = median)
##       Df F value  Pr(>F)  
## group  1    4.76 0.08093 .
##        5                  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# high p-value means that variances are similar and that we can do ANOVA

# Shapiro-Wilk’s method is widely recommended for normality test and it provides better power than K-S. It is based on the correlation between the data and the corresponding normal scores.

shapiro.test(df$non_crop_cover)
## 
##  Shapiro-Wilk normality test
## 
## data:  df$non_crop_cover
## W = 0.74543, p-value = 0.01137
# low p value means that data is not normally distributed, but let's still do ANOVA

# analysis of variance takes the same form as the usual models you'd see in R
# response ~ dependent, data = environmental grouping

res.aov <- aov(non_crop_cover ~ Treatment, data = df)
summary(res.aov)
##             Df Sum Sq Mean Sq F value Pr(>F)  
## Treatment    1   2885  2885.2    8.19 0.0353 *
## Residuals    5   1761   352.3                 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# As the ANOVA test is significant, we can compute Tukey HSD
TukeyHSD(res.aov)
##   Tukey multiple comparisons of means
##     95% family-wise confidence level
## 
## Fit: aov(formula = non_crop_cover ~ Treatment, data = df)
## 
## $Treatment
##                        diff      lwr      upr     p adj
## organic-conventional 41.025 4.175863 77.87414 0.0353283

Non-crop vegetation cover in 2012 is significantly different between conventional and organic

2.3. Plot

pal <- c(meadow = "#fbc02d", organic = "#8a8a8a", conventional =  "#b71c1c")

df <- subset(plant.div, Treatment!="meadow")
df <- droplevels(df)

df$Treatment <- factor(df$Treatment, levels = c("meadow", "organic", "conventional"))

plot_non_crop_cover <- ggplot(df, aes(x = Treatment, y = non_crop_cover, fill = Treatment)) + facet_grid (cols = vars(Year)) +
  geom_boxplot() +
  scale_fill_manual(values = pal) +
  theme(legend.position = "none",
        plot.background = element_rect("white"),
        panel.background = element_rect("white"),
        panel.grid = element_line("grey90"),
        axis.line = element_line("gray25"),
        axis.text.y = element_text(size = 12, color = "gray25"),
        axis.text.x=element_blank(),
        axis.ticks.x=element_blank(),
        axis.title = element_text(color = "gray25"),
        legend.text = element_text(size = 12)) + 
  labs(x = NULL,
       y = "Non-crop vegetation cover")
plot_non_crop_cover

2.4. Add CLD letters

2.4.1. Year 2011

# subset the Year
year11 <- plant.div[plant.div$Year == "2011",]

year11 <- subset(year11, Treatment!="meadow")
year11 <- droplevels(year11)

year11$Treatment <- factor(year11$Treatment, levels = c("meadow", "organic", "conventional"))

# Plot subset
plot_non_crop_cover11 <- ggplot(year11, aes(x = Treatment, y = non_crop_cover, fill = Treatment)) + facet_grid (cols = vars(Year)) +
  geom_boxplot() +
  scale_fill_manual(values = pal) +
  theme(legend.position = "none",
        plot.background = element_rect("white"),
        panel.background = element_rect("white"),
        panel.grid = element_line("grey90"),
        axis.line = element_line("gray25"),
        axis.text.y = element_text(size = 12, color = "gray25"),
        axis.text.x=element_blank(),
        axis.ticks.x=element_blank(),
        axis.title = element_text(color = "gray25"),
        legend.text = element_text(size = 12)) + 
  labs(x = NULL,
       y = "Non-crop vegetation cover")

# Annotate this plot
plot_non_crop_cover11 <- plot_non_crop_cover11 +
  geom_text(x = 1.5,  y = 94, 
           label = "**", 
           colour = "black") +
  geom_segment(x = 0.8, xend = 0.8, 
           y = 92, yend = 93,
           colour = "black") +
  geom_segment(x = 2.2, xend = 2.2, 
           y = 92, yend = 93,
           colour = "black") +
  geom_segment(x = 0.8, xend = 2.2, 
           y = 93, yend = 93,
           colour = "black") + ylim(0, 98)

plot_non_crop_cover11

2.4.2. Year 2012

# subset the Year
year12 <- plant.div[plant.div$Year == "2012",]

year12 <- subset(year12, Treatment!="meadow")
year12 <- droplevels(year12)

# Plot subset
plot_non_crop_cover12 <- ggplot(year12, aes(x = Treatment, y = non_crop_cover, fill = Treatment)) + facet_grid (cols = vars(Year)) +
  geom_boxplot() +
  scale_fill_manual(values = pal) +
  theme(legend.position = "none",
        plot.background = element_rect("white"),
        panel.background = element_rect("white"),
        panel.grid = element_line("grey90"),
        axis.line = element_line("gray25"),
        axis.text.y = element_text(size = 12, color = "gray25"),
        axis.text.x=element_blank(),
        axis.ticks.x=element_blank(),
        axis.title = element_text(color = "gray25"),
        legend.text = element_text(size = 12)) + 
  labs(x = NULL,
       y = "Non-crop vegetation cover")


# Annotate this plot
plot_non_crop_cover12 <- plot_non_crop_cover12 +
  geom_text(x = 1.5,  y = 94, 
           label = "*", 
           colour = "black") +
  geom_segment(x = 0.8, xend = 0.8, 
           y = 92, yend = 93,
           colour = "black") +
  geom_segment(x = 2.2, xend = 2.2, 
           y = 92, yend = 93,
           colour = "black") +
  geom_segment(x = 0.8, xend = 2.2, 
           y = 93, yend = 93,
           colour = "black") + ylim(0, 98)

plot_non_crop_cover12

library(ggpubr)

plot_non_crop_cover <- ggarrange(plot_non_crop_cover11, plot_non_crop_cover12 + rremove("ylab") + rremove("y.text") + rremove("y.axis") + rremove("y.ticks"),
                     ncol = 2, nrow = 1, widths = c(1,0.8))

plot_non_crop_cover

3. Richness

3.1. year 2011

df <- subset(plant.div, Year==2011)

# Using leveneTest()
result = leveneTest(richness ~ Treatment, df)
# print the result
print(result)
## Levene's Test for Homogeneity of Variance (center = median)
##       Df F value  Pr(>F)  
## group  2  4.6742 0.05137 .
##        7                  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# high p-value means that variances are similar and that we can do ANOVA

# Shapiro-Wilk’s method is widely recommended for normality test and it provides better power than K-S. It is based on the correlation between the data and the corresponding normal scores.

shapiro.test(df$richness)
## 
##  Shapiro-Wilk normality test
## 
## data:  df$richness
## W = 0.88504, p-value = 0.149
# high p value means that data is normally distributed

# analysis of variance takes the same form as the usual models you'd see in R
# response ~ dependent, data = environmental grouping

res.aov <- aov(richness ~ Treatment, data = df)
summary(res.aov)
##             Df Sum Sq Mean Sq F value Pr(>F)
## Treatment    2    107   53.50   2.003  0.205
## Residuals    7    187   26.71
# As the ANOVA test is significant, we can compute Tukey HSD
TukeyHSD(res.aov)
##   Tukey multiple comparisons of means
##     95% family-wise confidence level
## 
## Fit: aov(formula = richness ~ Treatment, data = df)
## 
## $Treatment
##                      diff        lwr      upr     p adj
## meadow-conventional  -0.5 -13.682456 12.68246 0.9931512
## organic-conventional  6.5  -4.263430 17.26343 0.2444743
## organic-meadow        7.0  -6.182456 20.18246 0.3210863

Plant richness in 2011 does not significantly differ between any treatments

3.2. Year 2012

df <- subset(plant.div, Year==2012)

# Using leveneTest()
result = leveneTest(richness ~ Treatment, df)
# print the result
print(result)
## Levene's Test for Homogeneity of Variance (center = median)
##       Df F value Pr(>F)
## group  2  2.2103 0.1909
##        6
# high p-value means that variances are similar and that we can do ANOVA

# Shapiro-Wilk’s method is widely recommended for normality test and it provides better power than K-S. It is based on the correlation between the data and the corresponding normal scores.

shapiro.test(df$richness)
## 
##  Shapiro-Wilk normality test
## 
## data:  df$richness
## W = 0.88578, p-value = 0.1805
# high p value means that data is normally distributed

# analysis of variance takes the same form as the usual models you'd see in R
# response ~ dependent, data = environmental grouping

res.aov <- aov(richness ~ Treatment, data = df)
summary(res.aov)
##             Df Sum Sq Mean Sq F value Pr(>F)  
## Treatment    2 192.97   96.49   7.067 0.0265 *
## Residuals    6  81.92   13.65                 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# As the ANOVA test is significant, we can compute Tukey HSD
TukeyHSD(res.aov)
##   Tukey multiple comparisons of means
##     95% family-wise confidence level
## 
## Fit: aov(formula = richness ~ Treatment, data = df)
## 
## $Treatment
##                           diff       lwr      upr     p adj
## meadow-conventional   3.750000 -6.068278 13.56828 0.5100073
## organic-conventional 10.583333  1.924426 19.24224 0.0221986
## organic-meadow        6.833333 -3.516041 17.18271 0.1867477

Plant richness in 2012 does significantly differs between conventional and organic only

3.3 Plot

pal <- c(meadow = "#fbc02d", organic = "#8a8a8a", conventional =  "#b71c1c")

plant.div$Treatment <- factor(plant.div$Treatment, levels = c("meadow", "organic", "conventional"))

plot_rich <- ggplot(plant.div, aes(x = Treatment, y = richness, fill = Treatment)) + facet_grid (cols = vars(Year)) +
  geom_boxplot() +
  scale_fill_manual(values = pal) + theme(legend.position = "none",
        plot.background = element_rect("white"),
        panel.background = element_rect("white"),
        panel.grid = element_line("grey90"),
        axis.line = element_line("gray25"),
        axis.text.y = element_text(size = 12, color = "gray25"),
        axis.text.x=element_blank(),
        axis.ticks.x=element_blank(),
        axis.title = element_text(color = "gray25"),
        legend.text = element_text(size = 12)) + 
  labs(x = NULL,
       y = "Plant richness")

plot_rich

3.4. Add CLD letters

3.4.1. Year 2011

# subset the Year
year11 <- plant.div[plant.div$Year == "2011",]

# Plot subset
plot_rich11 <- ggplot(year11, aes(x = Treatment, y = richness, fill = Treatment)) + facet_grid (cols = vars(Year)) +
  geom_boxplot() +
  scale_fill_manual(values = pal) +
  theme(legend.position = "none",
        plot.background = element_rect("white"),
        panel.background = element_rect("white"),
        panel.grid = element_line("grey90"),
        axis.line = element_line("gray25"),
        axis.text.y = element_text(size = 12, color = "gray25"),
        axis.text.x=element_blank(),
        axis.ticks.x=element_blank(),
        axis.title = element_text(color = "gray25"),
        legend.text = element_text(size = 12)) + 
  labs(x = NULL,
       y = "Plant richness")

# Annotate this plot
plot_rich11 <- plot_rich11 +
  geom_text(x = 1,  y = 34, 
           label = "(ns.)")  + ylim(10, 35)
plot_rich11

3.4.2. Year 2012

# subset the Year
year12 <- plant.div[plant.div$Year == "2012",]

# Plot subset
plot_rich12 <- ggplot(year12, aes(x = Treatment, y = richness, fill = Treatment)) + facet_grid (cols = vars(Year)) +
  geom_boxplot() +
  scale_fill_manual(values = pal) +
  theme(legend.position = "none",
        plot.background = element_rect("white"),
        panel.background = element_rect("white"),
        panel.grid = element_line("grey90"),
        axis.line = element_line("gray25"),
        axis.text.y = element_text(size = 12, color = "gray25"),
        axis.text.x=element_blank(),
        axis.ticks.x=element_blank(),
        axis.title = element_text(color = "gray25"),
        legend.text = element_text(size = 12)) + 
  labs(x = NULL,
       y = "Plant richness")


# Annotate this plot
plot_rich12 <- plot_rich12 +
  geom_text(x = 2.5,  y = 34, 
           label = "*", 
           colour = "black") +
  geom_segment(x = 1.8, xend = 1.8, 
           y = 32, yend = 33,
           colour = "black") +
  geom_segment(x = 3.2, xend = 3.2, 
           y = 32, yend = 33,
           colour = "black") +
  geom_segment(x = 1.8, xend = 3.2, 
           y = 33, yend = 33,
           colour = "black") + ylim(10, 35)

plot_rich12

3.4.3. Combine figures

library(ggpubr)

plot_rich <- ggarrange(plot_rich11, plot_rich12 + rremove("ylab") + rremove("y.text") + rremove("y.axis") + rremove("y.ticks"),
                     ncol = 2, nrow = 1, widths = c(1,0.75))

plot_rich

4. Shannon diversity

4.1. Year 2011

df <- subset(plant.div, Year==2011)

# Using leveneTest()
result = leveneTest(Shannon ~ Treatment, df)
# print the result
print(result)
## Levene's Test for Homogeneity of Variance (center = median)
##       Df F value Pr(>F)
## group  2  0.2186 0.8089
##        7
# high p-value means that variances are similar and that we can do ANOVA

# Shapiro-Wilk’s method is widely recommended for normality test and it provides better power than K-S. It is based on the correlation between the data and the corresponding normal scores.

shapiro.test(df$Shannon)
## 
##  Shapiro-Wilk normality test
## 
## data:  df$Shannon
## W = 0.97096, p-value = 0.8996
# high p value means that data is normally distributed

# analysis of variance takes the same form as the usual models you'd see in R
# response ~ dependent, data = environmental grouping

res.aov <- aov(Shannon ~ Treatment, data = df)
summary(res.aov)
##             Df Sum Sq Mean Sq F value Pr(>F)
## Treatment    2 0.2332 0.11658   1.279  0.336
## Residuals    7 0.6381 0.09115
# As the ANOVA test is significant, we can compute Tukey HSD
TukeyHSD(res.aov)
##   Tukey multiple comparisons of means
##     95% family-wise confidence level
## 
## Fit: aov(formula = Shannon ~ Treatment, data = df)
## 
## $Treatment
##                        diff        lwr      upr     p adj
## organic-meadow        0.415 -0.3550215 1.185022 0.3119085
## conventional-meadow   0.315 -0.4550215 1.085022 0.4874226
## conventional-organic -0.100 -0.7287200 0.528720 0.8879312

Shannon in 2011 does not significantly differ between any treatments

4.2. Year 2012

df <- subset(plant.div, Year==2012)

# Using leveneTest()
result = leveneTest(Shannon ~ Treatment, df)
# print the result
print(result)
## Levene's Test for Homogeneity of Variance (center = median)
##       Df F value Pr(>F)
## group  2  0.4157 0.6775
##        6
# high p-value means that variances are similar and that we can do ANOVA

# Shapiro-Wilk’s method is widely recommended for normality test and it provides better power than K-S. It is based on the correlation between the data and the corresponding normal scores.

shapiro.test(df$Shannon)
## 
##  Shapiro-Wilk normality test
## 
## data:  df$Shannon
## W = 0.96756, p-value = 0.8731
# high p value means that data is normally distributed

# analysis of variance takes the same form as the usual models you'd see in R
# response ~ dependent, data = environmental grouping

res.aov <- aov(Shannon ~ Treatment, data = df)
summary(res.aov)
##             Df Sum Sq Mean Sq F value Pr(>F)
## Treatment    2 0.3887  0.1943   2.598  0.154
## Residuals    6 0.4488  0.0748
# As the ANOVA test is significant, we can compute Tukey HSD
TukeyHSD(res.aov)
##   Tukey multiple comparisons of means
##     95% family-wise confidence level
## 
## Fit: aov(formula = Shannon ~ Treatment, data = df)
## 
## $Treatment
##                             diff        lwr       upr     p adj
## organic-meadow        0.52833333 -0.2377047 1.2943714 0.1665450
## conventional-meadow   0.47250000 -0.2542275 1.1992275 0.1941950
## conventional-organic -0.05583333 -0.6967468 0.5850801 0.9616295

Shannon in 2012 does not significantly differ between any treatments

4.3. Plot

pal <- c(meadow = "#fbc02d", organic = "#8a8a8a", conventional =  "#b71c1c")

plant.div$Treatment <- factor(plant.div$Treatment, levels = c("meadow", "organic", "conventional"))

plot_Shannon <- ggplot(plant.div, aes(x = Treatment, y = Shannon, fill = Treatment)) + facet_grid (cols = vars(Year)) +
  geom_boxplot() +
  scale_fill_manual(values = pal) +
  theme(legend.position = "none",
        plot.background = element_rect("white"),
        panel.background = element_rect("white"),
        panel.grid = element_line("grey90"),
        axis.line = element_line("gray25"),
        axis.text.y = element_text(size = 12, color = "gray25"),
        axis.text.x=element_blank(),
        axis.ticks.x=element_blank(),
        axis.title = element_text(color = "gray25"),
        legend.text = element_text(size = 12)) + 
  labs(x = NULL,
       y = "Plant Shannon diversity")

plot_Shannon

4.4. Add CLD letters

4.4.1. Year 2011

# subset the Year
year11 <- plant.div[plant.div$Year == "2011",]

# Plot subset
plot_Shannon11 <- ggplot(year11, aes(x = Treatment, y = Shannon, fill = Treatment)) + facet_grid (cols = vars(Year)) +
  geom_boxplot() +
  scale_fill_manual(values = pal) +
  theme(legend.position = "none",
        plot.background = element_rect("white"),
        panel.background = element_rect("white"),
        panel.grid = element_line("grey90"),
        axis.line = element_line("gray25"),
        axis.text.y = element_text(size = 12, color = "gray25"),
        axis.text.x=element_blank(),
        axis.ticks.x=element_blank(),
        axis.title = element_text(color = "gray25"),
        legend.text = element_text(size = 12)) + 
  labs(x = NULL,
       y = "Plant Shannon diversity")

# Annotate this plot
plot_Shannon11 <- plot_Shannon11 +
  geom_text(x = 1,  y = 2.9, 
           label = "(ns.)")  + ylim(1.5, 3)
plot_Shannon11

4.4.2. Year 2012

# subset the Year
year12 <- plant.div[plant.div$Year == "2012",]

# Plot subset
plot_Shannon12 <- ggplot(year12, aes(x = Treatment, y = Shannon, fill = Treatment)) + facet_grid (cols = vars(Year)) +
  geom_boxplot() +
  scale_fill_manual(values = pal) +
  theme(legend.position = "right",
        legend.title = element_blank(),
        plot.background = element_rect("white"),
        panel.background = element_rect("white"),
        panel.grid = element_line("grey90"),
        axis.line = element_line("gray25"),
        axis.text.y = element_text(size = 12, color = "gray25"),
        axis.text.x=element_blank(),
        axis.ticks.x=element_blank(),
        axis.title = element_text(color = "gray25"),
        legend.text = element_text(size = 12)) + 
  labs(x = NULL,
       y = "Plant Shannon diversity")



# Annotate this plot
plot_Shannon12 <- plot_Shannon12 +
  geom_text(x = 1,  y = 2.9, 
           label = "(ns.)")  + ylim(1.5, 3)
plot_Shannon12

4.4.3. Combine figures

library(ggpubr)

plot_Shannon <- ggarrange(plot_Shannon11, plot_Shannon12 + rremove("ylab") + rremove("y.text") + rremove("y.axis") + rremove("y.ticks"),
                     ncol = 2, nrow = 1, widths = c(1,1.63))

plot_Shannon

5. Combine the figures

5.1. With Non-crop vegetation cover

library(ggpubr)

fig <- ggarrange(plot_non_crop_cover, plot_rich, plot_Shannon,
                     ncol = 3, nrow = 1, widths = c(3,4,6))

fig

5.2. Without Non-crop vegetation cover

fig <- ggarrange(plot_rich, plot_Shannon,
                     ncol = 3, nrow = 1, widths = c(2, 3.1))

fig


library('phyloseq')
library("cowplot")
library("ggplot2")
library("vegan")
library("microbiome")

setwd('C:\\Users\\lehakkin\\PhD\\Fungal_ITS_soil_vertical_profile')

load("ps_FINAL")#ps
ps
## phyloseq-class experiment-level object
## otu_table()   OTU Table:         [ 20610 taxa and 140 samples ]
## sample_data() Sample Data:       [ 140 samples by 32 sample variables ]
## tax_table()   Taxonomy Table:    [ 20610 taxa by 7 taxonomic ranks ]
meta <- meta(ps)

Test soil properties between management types in all soil layers

library("tibble")
library(stringr)
library(reshape2)
library(tidyr)
library("dplyr")
library("multcomp")
library(car)
library("rcompanion")
library("multcompView")

meta$depth <- as.factor(meta$depth)
soil.properties <- as.factor(c("C_g_per_kg", "N_gkg", "C_per_N", "DOC_mgkg", "TP_gkg", 
                               "Porg_mgkg", "Pinorg_mgkg", "PH2O_mgkg", "pH_H2O", 
                               "Feox_mmolkg", "Alox_mmolkg", "log_root"))

# Create a data frame with 0 rows and 6 columns
df_test <- data.frame(matrix(ncol = 6))
colnames(df_test) <- c('sample_type', 'property', 'depth', 'LevenesP', 'ANOVAsOrKrusalsP', 'mean_se_cld')

# Loop over depth and soil properties
for (i in levels(meta$depth)) {
  for (j in levels(soil.properties)) {
    df <- filter(meta, depth == i & sample_type != "forest")
    df <- df[, c("sample_type", j)]
    df <- droplevels(df)
    
    # Perform Levene's test
    levene <- leveneTest(df[[j]] ~ sample_type, df)
    
    if (levene[1,3] > 0.05) {
      # Perform ANOVA
      res.aov <- aov(df[[j]] ~ sample_type, data = df)
      res.aov2 <- summary(res.aov)
      res.aov2 <- res.aov2[[1]]
      
      # Create a result data frame for ANOVA
      df.res.aov <- data.frame(sample_type = levels(df$sample_type),
                               LevenesP = sprintf("%.3f", levene[1,3]),
                               ANOVAsOrKrusalsP = sprintf("%.3f", res.aov2[1,5]))
      
      # Tukey's HSD and compact letter display
      tukey <- glht(res.aov, linfct = mcp(sample_type = "Tukey"))
      cld <- cld(tukey)
      cld <- cld[["mcletters"]][["Letters"]]
      cld <- as.data.frame(cld)
      cld$sample_type <- rownames(cld)
      rownames(cld) <- NULL
      
    } else {
      # Perform Kruskal-Wallis test
      res.aov2 <- kruskal.test(df[[j]] ~ sample_type, data = df)
      df.res.aov <- data.frame(sample_type = levels(df$sample_type),
                               LevenesP = sprintf("%.3f", levene[1, 3]),
                               ANOVAsOrKrusalsP = sprintf("%.3f", res.aov2$p.value))
      
      # Wilcoxon test and compact letter display
      wilcox.res <- pairwise.wilcox.test(df[[j]], df$sample_type, p.adjust.method = "BH")
      wilcox.res <- wilcox.res[["p.value"]]
      
      cld <- fullPTable(wilcox.res)
      cld[is.na(cld)] <- 0
      cld <- multcompLetters(cld)
      cld <- cld[["Letters"]]
      cld <- as.data.frame(cld)
      cld$sample_type <- rownames(cld)
      rownames(cld) <- NULL
    }
    
    # Calculate mean and SE grouped by sample_type
    x <- df %>%
      group_by(sample_type) %>%
      summarise(mean = sprintf("%.3f", mean(.data[[j]], na.rm = TRUE)),
                se = sprintf("%.3f", sd(.data[[j]], na.rm = TRUE) / sqrt(n())))
    
    x$property <- j
    x$depth <- i
    
    # Merge results and append to df_test
    output <- merge(x, df.res.aov, by = "sample_type")
    output <- merge(output, cld, by = "sample_type")
    output$mean_se_cld <- paste(output$mean, "±", output$se, output$cld, sep = "")
    output <- output[, -c(2,3,8)]
    
    df_test <- rbind(df_test, output)
  }
}

df_test
##      sample_type    property   depth LevenesP ANOVAsOrKrusalsP      mean_se_cld
## 1           <NA>        <NA>    <NA>     <NA>             <NA>             <NA>
## 2   conventional Alox_mmolkg  0...10    0.078            0.000    81.770±2.977b
## 3         meadow Alox_mmolkg  0...10    0.078            0.000   120.218±9.732a
## 4        organic Alox_mmolkg  0...10    0.078            0.000    76.559±2.766b
## 5   conventional  C_g_per_kg  0...10    0.016            0.000    32.765±0.910c
## 6         meadow  C_g_per_kg  0...10    0.016            0.000    70.317±2.595a
## 7        organic  C_g_per_kg  0...10    0.016            0.000    35.276±0.678b
## 8   conventional     C_per_N  0...10    0.427            0.000    11.588±0.126b
## 9         meadow     C_per_N  0...10    0.427            0.000    12.962±0.174a
## 10       organic     C_per_N  0...10    0.427            0.000    11.592±0.253b
## 11  conventional    DOC_mgkg  0...10    0.367            0.000  325.756±24.568b
## 12        meadow    DOC_mgkg  0...10    0.367            0.000  849.411±40.715a
## 13       organic    DOC_mgkg  0...10    0.367            0.000  340.274±31.792b
## 14  conventional Feox_mmolkg  0...10    0.943            0.002    97.212±3.778b
## 15        meadow Feox_mmolkg  0...10    0.943            0.002   122.636±4.853a
## 16       organic Feox_mmolkg  0...10    0.943            0.002   101.175±5.494b
## 17  conventional    log_root  0...10    0.453            0.000    -0.016±0.123b
## 18        meadow    log_root  0...10    0.453            0.000     0.934±0.190a
## 19       organic    log_root  0...10    0.453            0.000     0.145±0.120b
## 20  conventional       N_gkg  0...10    0.017            0.000     2.828±0.076c
## 21        meadow       N_gkg  0...10    0.017            0.000     5.425±0.191a
## 22       organic       N_gkg  0...10    0.017            0.000     3.047±0.052b
## 23  conventional      pH_H2O  0...10    0.219            0.013    5.841±0.043ab
## 24        meadow      pH_H2O  0...10    0.219            0.013     5.692±0.110a
## 25       organic      pH_H2O  0...10    0.219            0.013     6.058±0.074b
## 26  conventional   PH2O_mgkg  0...10    0.057            0.003     0.388±0.053a
## 27        meadow   PH2O_mgkg  0...10    0.057            0.003     0.453±0.076a
## 28       organic   PH2O_mgkg  0...10    0.057            0.003     0.156±0.027b
## 29  conventional Pinorg_mgkg  0...10    0.885            0.091  566.024±49.491a
## 30        meadow Pinorg_mgkg  0...10    0.885            0.091  406.421±48.748a
## 31       organic Pinorg_mgkg  0...10    0.885            0.091  471.903±50.249a
## 32  conventional   Porg_mgkg  0...10    0.273            0.002  475.803±33.557b
## 33        meadow   Porg_mgkg  0...10    0.273            0.002  639.043±42.358a
## 34       organic   Porg_mgkg  0...10    0.273            0.002  456.491±24.680b
## 35  conventional      TP_gkg  0...10    0.964            0.102     1.042±0.041a
## 36        meadow      TP_gkg  0...10    0.964            0.102     1.045±0.040a
## 37       organic      TP_gkg  0...10    0.964            0.102     0.928±0.043a
## 38  conventional Alox_mmolkg 10...20    0.158            0.000    80.116±3.141b
## 39        meadow Alox_mmolkg 10...20    0.158            0.000   131.117±8.026a
## 40       organic Alox_mmolkg 10...20    0.158            0.000    75.829±3.234b
## 41  conventional  C_g_per_kg 10...20    0.136            0.000    32.043±1.281b
## 42        meadow  C_g_per_kg 10...20    0.136            0.000    46.864±1.829a
## 43       organic  C_g_per_kg 10...20    0.136            0.000    34.764±0.705b
## 44  conventional     C_per_N 10...20    0.198            0.042   11.614±0.129ab
## 45        meadow     C_per_N 10...20    0.198            0.042    12.445±0.393a
## 46       organic     C_per_N 10...20    0.198            0.042    11.437±0.273b
## 47  conventional    DOC_mgkg 10...20    0.013            0.001  284.888±10.603b
## 48        meadow    DOC_mgkg 10...20    0.013            0.001  495.767±37.094a
## 49       organic    DOC_mgkg 10...20    0.013            0.001  327.089±21.578b
## 50  conventional Feox_mmolkg 10...20    0.812            0.000    97.640±4.588b
## 51        meadow Feox_mmolkg 10...20    0.812            0.000   140.049±4.099a
## 52       organic Feox_mmolkg 10...20    0.812            0.000   102.482±5.157b
## 53  conventional    log_root 10...20    0.460            0.002    -0.709±0.191b
## 54        meadow    log_root 10...20    0.460            0.002     0.336±0.129a
## 55       organic    log_root 10...20    0.460            0.002   -0.267±0.202ab
## 56  conventional       N_gkg 10...20    0.046            0.001     2.756±0.095c
## 57        meadow       N_gkg 10...20    0.046            0.001     3.790±0.186a
## 58       organic       N_gkg 10...20    0.046            0.001     3.048±0.078b
## 59  conventional      pH_H2O 10...20    0.259            0.000     5.994±0.047b
## 60        meadow      pH_H2O 10...20    0.259            0.000     5.675±0.082a
## 61       organic      pH_H2O 10...20    0.259            0.000     6.122±0.068b
## 62  conventional   PH2O_mgkg 10...20    0.576            0.059     0.238±0.042a
## 63        meadow   PH2O_mgkg 10...20    0.576            0.059     0.113±0.026a
## 64       organic   PH2O_mgkg 10...20    0.576            0.059     0.151±0.037a
## 65  conventional Pinorg_mgkg 10...20    0.881            0.153  475.825±40.072a
## 66        meadow Pinorg_mgkg 10...20    0.881            0.153  373.108±43.638a
## 67       organic Pinorg_mgkg 10...20    0.881            0.153  488.146±47.331a
## 68  conventional   Porg_mgkg 10...20    0.737            0.237  466.746±30.861a
## 69        meadow   Porg_mgkg 10...20    0.737            0.237  521.230±32.480a
## 70       organic   Porg_mgkg 10...20    0.737            0.237  454.918±17.320a
## 71  conventional      TP_gkg 10...20    0.722            0.593     0.943±0.042a
## 72        meadow      TP_gkg 10...20    0.722            0.593     0.894±0.030a
## 73       organic      TP_gkg 10...20    0.722            0.593     0.943±0.038a
## 74  conventional Alox_mmolkg 20...30    0.093            0.000    62.675±3.216b
## 75        meadow Alox_mmolkg 20...30    0.093            0.000   107.130±9.584a
## 76       organic Alox_mmolkg 20...30    0.093            0.000    65.338±3.654b
## 77  conventional  C_g_per_kg 20...30    0.120            0.003    13.505±1.994b
## 78        meadow  C_g_per_kg 20...30    0.120            0.003    31.576±3.912a
## 79       organic  C_g_per_kg 20...30    0.120            0.003   21.203±4.044ab
## 80  conventional     C_per_N 20...30    0.120            0.089    10.429±0.723a
## 81        meadow     C_per_N 20...30    0.120            0.089    12.562±0.250a
## 82       organic     C_per_N 20...30    0.120            0.089    10.868±0.891a
## 83  conventional    DOC_mgkg 20...30    0.214            0.002  116.479±16.148b
## 84        meadow    DOC_mgkg 20...30    0.214            0.002  303.054±41.855a
## 85       organic    DOC_mgkg 20...30    0.214            0.002 186.690±36.291ab
## 86  conventional Feox_mmolkg 20...30    0.587            0.000    64.312±8.328b
## 87        meadow Feox_mmolkg 20...30    0.587            0.000  140.938±11.602a
## 88       organic Feox_mmolkg 20...30    0.587            0.000   80.066±10.415b
## 89  conventional    log_root 20...30    0.160            0.001    -1.442±0.204b
## 90        meadow    log_root 20...30    0.160            0.001    -0.252±0.078a
## 91       organic    log_root 20...30    0.160            0.001    -0.954±0.230b
## 92  conventional       N_gkg 20...30    0.101            0.012     1.281±0.147b
## 93        meadow       N_gkg 20...30    0.101            0.012     2.492±0.284a
## 94       organic       N_gkg 20...30    0.101            0.012    1.927±0.345ab
## 95  conventional      pH_H2O 20...30    0.460            0.000     6.347±0.071b
## 96        meadow      pH_H2O 20...30    0.460            0.000     5.825±0.082a
## 97       organic      pH_H2O 20...30    0.460            0.000     6.463±0.140b
## 98  conventional   PH2O_mgkg 20...30    0.035            0.082     0.062±0.005a
## 99        meadow   PH2O_mgkg 20...30    0.035            0.082     0.097±0.015a
## 100      organic   PH2O_mgkg 20...30    0.035            0.082     0.086±0.011a
## 101 conventional Pinorg_mgkg 20...30    0.761            0.289  494.796±51.363a
## 102       meadow Pinorg_mgkg 20...30    0.761            0.289  383.399±36.473a
## 103      organic Pinorg_mgkg 20...30    0.761            0.289  458.051±57.651a
## 104 conventional   Porg_mgkg 20...30    0.043            0.087  203.710±26.330b
## 105       meadow   Porg_mgkg 20...30    0.043            0.087 271.122±52.837ab
## 106      organic   Porg_mgkg 20...30    0.043            0.087  317.298±32.217a
## 107 conventional      TP_gkg 20...30    0.890            0.284     0.698±0.053a
## 108       meadow      TP_gkg 20...30    0.890            0.284     0.654±0.045a
## 109      organic      TP_gkg 20...30    0.890            0.284     0.775±0.056a
## 110 conventional Alox_mmolkg 30...40    0.000            0.002    62.788±0.801b
## 111       meadow Alox_mmolkg 30...40    0.000            0.002    77.735±3.571a
## 112      organic Alox_mmolkg 30...40    0.000            0.002    62.620±1.690b
## 113 conventional  C_g_per_kg 30...40    0.039            0.003     4.124±0.188b
## 114       meadow  C_g_per_kg 30...40    0.039            0.003     9.227±1.985a
## 115      organic  C_g_per_kg 30...40    0.039            0.003     4.764±0.328b
## 116 conventional     C_per_N 30...40    0.025            0.007     6.165±0.471b
## 117       meadow     C_per_N 30...40    0.025            0.007    11.202±2.058a
## 118      organic     C_per_N 30...40    0.025            0.007     6.469±0.334b
## 119 conventional    DOC_mgkg 30...40    0.061            0.003    64.818±3.470b
## 120       meadow    DOC_mgkg 30...40    0.061            0.003  117.462±15.678a
## 121      organic    DOC_mgkg 30...40    0.061            0.003    80.141±6.822b
## 122 conventional Feox_mmolkg 30...40    0.025            0.000    27.530±1.893b
## 123       meadow Feox_mmolkg 30...40    0.025            0.000  119.286±17.522a
## 124      organic Feox_mmolkg 30...40    0.025            0.000    31.413±2.423b
## 125 conventional    log_root 30...40    0.499            0.000    -1.844±0.121c
## 126       meadow    log_root 30...40    0.499            0.000    -0.753±0.087a
## 127      organic    log_root 30...40    0.499            0.000    -1.494±0.076b
## 128 conventional       N_gkg 30...40    0.054            0.171     0.682±0.023a
## 129       meadow       N_gkg 30...40    0.054            0.171     0.851±0.102a
## 130      organic       N_gkg 30...40    0.054            0.171     0.739±0.040a
## 131 conventional      pH_H2O 30...40    0.324            0.001     6.814±0.085b
## 132       meadow      pH_H2O 30...40    0.324            0.001     6.280±0.151a
## 133      organic      pH_H2O 30...40    0.324            0.001     6.992±0.110b
## 134 conventional   PH2O_mgkg 30...40    0.885            0.738     0.088±0.012a
## 135       meadow   PH2O_mgkg 30...40    0.885            0.738     0.098±0.024a
## 136      organic   PH2O_mgkg 30...40    0.885            0.738     0.079±0.012a
## 137 conventional Pinorg_mgkg 30...40    0.075            0.339  397.576±21.274a
## 138       meadow Pinorg_mgkg 30...40    0.075            0.339  469.409±49.171a
## 139      organic Pinorg_mgkg 30...40    0.075            0.339  433.861±27.993a
## 140 conventional   Porg_mgkg 30...40    0.087            0.027  167.123±10.396b
## 141       meadow   Porg_mgkg 30...40    0.087            0.027  112.548±19.473a
## 142      organic   Porg_mgkg 30...40    0.087            0.027 161.557±11.063ab
## 143 conventional      TP_gkg 30...40    0.080            0.817     0.565±0.017a
## 144       meadow      TP_gkg 30...40    0.080            0.817     0.570±0.057a
## 145      organic      TP_gkg 30...40    0.080            0.817     0.595±0.024a
## 146 conventional Alox_mmolkg   40...    0.582            0.003    51.008±1.032b
## 147       meadow Alox_mmolkg   40...    0.582            0.003    57.120±1.483a
## 148      organic Alox_mmolkg   40...    0.582            0.003    51.077±1.273b
## 149 conventional  C_g_per_kg   40...    0.353            0.062     2.677±0.113a
## 150       meadow  C_g_per_kg   40...    0.353            0.062     3.264±0.303a
## 151      organic  C_g_per_kg   40...    0.353            0.062     2.679±0.089a
## 152 conventional     C_per_N   40...    0.208            0.013     4.826±0.175b
## 153       meadow     C_per_N   40...    0.208            0.013     5.883±0.387a
## 154      organic     C_per_N   40...    0.208            0.013     4.939±0.130b
## 155 conventional    DOC_mgkg   40...    0.427            0.003    41.959±2.555b
## 156       meadow    DOC_mgkg   40...    0.427            0.003    61.773±4.866a
## 157      organic    DOC_mgkg   40...    0.427            0.003    45.680±4.074b
## 158 conventional Feox_mmolkg   40...    0.000            0.000    25.711±0.788b
## 159       meadow Feox_mmolkg   40...    0.000            0.000   67.088±12.098a
## 160      organic Feox_mmolkg   40...    0.000            0.000    24.727±0.764b
## 161 conventional    log_root   40...    0.393            0.000    -2.004±0.098b
## 162       meadow    log_root   40...    0.393            0.000    -1.236±0.071a
## 163      organic    log_root   40...    0.393            0.000    -1.555±0.126a
## 164 conventional       N_gkg   40...    0.896            0.925     0.564±0.041a
## 165       meadow       N_gkg   40...    0.896            0.925     0.556±0.032a
## 166      organic       N_gkg   40...    0.896            0.925     0.545±0.023a
## 167 conventional      pH_H2O   40...    0.446            0.003     7.244±0.082b
## 168       meadow      pH_H2O   40...    0.446            0.003     6.876±0.123a
## 169      organic      pH_H2O   40...    0.446            0.003     7.372±0.058b
## 170 conventional   PH2O_mgkg   40...    0.837            0.219     0.136±0.028a
## 171       meadow   PH2O_mgkg   40...    0.837            0.219     0.098±0.018a
## 172      organic   PH2O_mgkg   40...    0.837            0.219     0.167±0.032a
## 173 conventional Pinorg_mgkg   40...    0.951            0.625  555.814±19.855a
## 174       meadow Pinorg_mgkg   40...    0.951            0.625  551.282±20.510a
## 175      organic Pinorg_mgkg   40...    0.951            0.625  579.265±23.771a
## 176 conventional   Porg_mgkg   40...    0.845            0.376  128.341±13.832a
## 177       meadow   Porg_mgkg   40...    0.845            0.376   99.637±13.041a
## 178      organic   Porg_mgkg   40...    0.845            0.376  111.474±16.322a
## 179 conventional      TP_gkg   40...    0.140            0.329     0.684±0.022a
## 180       meadow      TP_gkg   40...    0.140            0.329     0.651±0.014a
## 181      organic      TP_gkg   40...    0.140            0.329     0.691±0.020a

library('phyloseq')
library("cowplot")
library("dplyr")
library("ggplot2")
library("vegan")
library("microbiome")
library("tibble")
library(stringr)
library(reshape2)
library(tidyr)

setwd('C:\\Users\\lehakkin\\PhD\\Fungal_ITS_soil_vertical_profile')

load("ps_FINAL")#ps
ps
## phyloseq-class experiment-level object
## otu_table()   OTU Table:         [ 20610 taxa and 140 samples ]
## sample_data() Sample Data:       [ 140 samples by 32 sample variables ]
## tax_table()   Taxonomy Table:    [ 20610 taxa by 7 taxonomic ranks ]
meta <- meta(ps)
MyPalette <- c(forest = "#1167b1", meadow = "#fbc02d", organic = "#8a8a8a", conventional =  "#b71c1c")

library("cowplot")

Pinorg_mgkg

Pinorg_mgkg <- meta %>%
  group_by(sample_type, depth_numerical) %>%
  summarise(mean = mean(Pinorg_mgkg, na.rm = TRUE)/1000, se = (sd(Pinorg_mgkg, na.rm = TRUE)/sqrt(length((Pinorg_mgkg))))/1000) %>%
  ungroup() %>%
  ggplot(aes(y=mean, depth_numerical, color=sample_type)) +
  geom_line(linetype = "dashed") +
  geom_point(size=2, position=position_dodge(1.5)) +
  geom_errorbar(aes(ymin=mean-se, ymax=mean+se), width=.5, position=position_dodge(1.5)) + theme(plot.title = element_text(size = 20, hjust = 0.5)) + theme(panel.border = element_rect(colour = "black", fill=NA, size=0.5)) + theme_cowplot() +
  theme(axis.text = element_text(size=10),
        axis.title = element_text(size=14),
        legend.text = element_text(size=12),
        legend.title = element_text(size=16),
        title = element_text(size=18)) +
  scale_y_continuous(name="P-inorg (g/kg)") +
  labs(x = NULL) + theme(legend.title = element_blank()) + scale_colour_manual(values = MyPalette) + scale_x_reverse(breaks = c(60, 35, 25, 15, 5), labels = c("40-80 cm", "30-40 cm", "20-30 cm", "10-20 cm", "0-10 cm")) + coord_flip()

Pinorg_mgkg

PH2O_mgkg

PH2O_mgkg <- meta %>%
  group_by(sample_type, depth_numerical) %>%
  summarise(mean = mean(PH2O_mgkg, na.rm = TRUE), se = (sd(PH2O_mgkg, na.rm = TRUE)/sqrt(length((PH2O_mgkg))))) %>%
  ungroup() %>%
  ggplot(aes(y=mean, depth_numerical, color=sample_type)) +
  geom_line(linetype = "dashed") +
  geom_point(size=2, position=position_dodge(1.5)) +
  geom_errorbar(aes(ymin=mean-se, ymax=mean+se), width=.5, position=position_dodge(1.5)) + theme(plot.title = element_text(size = 20, hjust = 0.5)) + theme(panel.border = element_rect(colour = "black", fill=NA, size=0.5)) + theme_cowplot() +
  theme(axis.text = element_text(size=10),
        axis.title = element_text(size=14),
        legend.text = element_text(size=12),
        legend.title = element_text(size=16),
        title = element_text(size=18)) +
  scale_y_continuous(name="P-H2O (mg/kg)") + labs(x = NULL) + theme(legend.title = element_blank()) + scale_colour_manual(values = MyPalette) + scale_x_reverse(breaks = c(60, 35, 25, 15, 5), labels = c("40-80 cm", "30-40 cm", "20-30 cm", "10-20 cm", "0-10 cm")) + coord_flip()

PH2O_mgkg

Alox_mmolkg

Alox_mmolkg <- meta %>%
  group_by(sample_type, depth_numerical) %>%
  summarise(mean = mean(Alox_mmolkg, na.rm = TRUE), se = (sd(Alox_mmolkg, na.rm = TRUE)/sqrt(length((Alox_mmolkg))))) %>%
  ungroup() %>%
  ggplot(aes(y=mean, depth_numerical, color=sample_type)) +
  geom_line(linetype = "dashed") +
  geom_point(size=2, position=position_dodge(1.5)) +
  geom_errorbar(aes(ymin=mean-se, ymax=mean+se), width=.5, position=position_dodge(1.5)) + theme(plot.title = element_text(size = 20, hjust = 0.5)) + theme(panel.border = element_rect(colour = "black", fill=NA, size=0.5)) + theme_cowplot() +
  theme(axis.text = element_text(size=10),
        axis.title = element_text(size=14),
        legend.text = element_text(size=12),
        legend.title = element_text(size=16),
        title = element_text(size=18)) +
  scale_y_continuous(name="Al-ox (mmol/kg)") + labs(x = NULL) + theme(legend.title = element_blank()) + scale_colour_manual(values = MyPalette) + scale_x_reverse(breaks = c(60, 35, 25, 15, 5), labels = c("40-80 cm", "30-40 cm", "20-30 cm", "10-20 cm", "0-10 cm")) + coord_flip()

Alox_mmolkg

Feox_mmolkg

Feox_mmolkg <- meta %>%
  group_by(sample_type, depth_numerical) %>%
  summarise(mean = mean(Feox_mmolkg, na.rm = TRUE), se = (sd(Feox_mmolkg, na.rm = TRUE)/sqrt(length((Feox_mmolkg))))) %>%
  ungroup() %>%
  ggplot(aes(y=mean, depth_numerical, color=sample_type)) +
  geom_line(linetype = "dashed") +
  geom_point(size=2, position=position_dodge(1.5)) +
  geom_errorbar(aes(ymin=mean-se, ymax=mean+se), width=.5, position=position_dodge(1.5)) + theme(plot.title = element_text(size = 20, hjust = 0.5)) + theme(panel.border = element_rect(colour = "black", fill=NA, size=0.5)) + theme_cowplot() +
  theme(axis.text = element_text(size=10),
        axis.title = element_text(size=14),
        legend.text = element_text(size=12),
        legend.title = element_text(size=16),
        title = element_text(size=18)) +
  scale_y_continuous(name="Fe-ox (mmol/kg)") + labs(x = NULL) + theme(legend.title = element_blank()) + scale_colour_manual(values = MyPalette) + scale_x_reverse(breaks = c(60, 35, 25, 15, 5), labels = c("40-80 cm", "30-40 cm", "20-30 cm", "10-20 cm", "0-10 cm")) + coord_flip()

Feox_mmolkg

pH_H2O

pH_H2O <- meta %>%
  group_by(sample_type, depth_numerical) %>%
  summarise(mean = mean(pH_H2O, na.rm = TRUE), se = (sd(pH_H2O, na.rm = TRUE)/sqrt(length((pH_H2O))))) %>%
  ungroup() %>%
  ggplot(aes(y=mean, depth_numerical, color=sample_type)) +
  geom_line(linetype = "dashed") +
  geom_point(size=2, position=position_dodge(1.5)) +
  geom_errorbar(aes(ymin=mean-se, ymax=mean+se), width=.5, position=position_dodge(1.5)) + theme(plot.title = element_text(size = 20, hjust = 0.5)) + theme(panel.border = element_rect(colour = "black", fill=NA, size=0.5)) + theme_cowplot() +
  theme(axis.text = element_text(size=10),
        axis.title = element_text(size=14),
        legend.text = element_text(size=12),
        legend.title = element_text(size=16),
        title = element_text(size=18)) +
  scale_y_continuous(name="pH") + labs(x = NULL) + theme(legend.title = element_blank()) + scale_colour_manual(values = MyPalette) + scale_x_reverse(breaks = c(60, 35, 25, 15, 5), labels = c("40-80 cm", "30-40 cm", "20-30 cm", "10-20 cm", "0-10 cm")) + coord_flip()

pH_H2O

C_g_per_kg

C_g_per_kg <- meta %>%
  group_by(sample_type, depth_numerical) %>%
  summarise(mean = mean(C_g_per_kg, na.rm = TRUE), se = (sd(C_g_per_kg, na.rm = TRUE)/sqrt(length((C_g_per_kg))))) %>%
  ungroup() %>%
  ggplot(aes(y=mean, depth_numerical, color=sample_type)) +
  geom_line(linetype = "dashed") +
  geom_point(size=2, position=position_dodge(1.5)) +
  geom_errorbar(aes(ymin=mean-se, ymax=mean+se), width=.5, position=position_dodge(1.5)) + theme(plot.title = element_text(size = 20, hjust = 0.5)) + theme(panel.border = element_rect(colour = "black", fill=NA, size=0.5)) + theme_cowplot() +
  theme(axis.text = element_text(size=10),
        axis.title = element_text(size=14),
        legend.text = element_text(size=12),
        legend.title = element_text(size=16),
        title = element_text(size=18)) +
  scale_y_continuous(name="C (g/kg)") + labs(x = NULL) + theme(legend.title = element_blank()) + scale_colour_manual(values = MyPalette) + scale_x_reverse(breaks = c(60, 35, 25, 15, 5), labels = c("40-80 cm", "30-40 cm", "20-30 cm", "10-20 cm", "0-10 cm")) + coord_flip()

C_g_per_kg

N_gkg

N_gkg <- meta %>%
  group_by(sample_type, depth_numerical) %>%
  summarise(mean = mean(N_gkg, na.rm = TRUE), se = (sd(N_gkg, na.rm = TRUE)/sqrt(length((N_gkg))))) %>%
  ungroup() %>%
  ggplot(aes(y=mean, depth_numerical, color=sample_type)) +
  geom_line(linetype = "dashed") +
  geom_point(size=2, position=position_dodge(1.5)) +
  geom_errorbar(aes(ymin=mean-se, ymax=mean+se), width=.5, position=position_dodge(1.5)) + theme(plot.title = element_text(size = 20, hjust = 0.5)) + theme(panel.border = element_rect(colour = "black", fill=NA, size=0.5)) + theme_cowplot() +
  theme(axis.text = element_text(size=10),
        axis.title = element_text(size=14),
        legend.text = element_text(size=12),
        legend.title = element_text(size=16),
        title = element_text(size=18)) +
  scale_y_continuous(name="N (g/kg)") + labs(x = NULL) + theme(legend.title = element_blank()) + scale_colour_manual(values = MyPalette) + scale_x_reverse(breaks = c(60, 35, 25, 15, 5), labels = c("40-80 cm", "30-40 cm", "20-30 cm", "10-20 cm", "0-10 cm")) + coord_flip()

N_gkg

TP_gkg

TP_gkg <- meta %>%
  group_by(sample_type, depth_numerical) %>%
  summarise(mean = mean(TP_gkg, na.rm = TRUE), se = (sd(TP_gkg, na.rm = TRUE)/sqrt(length((TP_gkg))))) %>%
  ungroup() %>%
  ggplot(aes(y=mean, depth_numerical, color=sample_type)) +
  geom_line(linetype = "dashed") +
  geom_point(size=2, position=position_dodge(1.5)) +
  geom_errorbar(aes(ymin=mean-se, ymax=mean+se), width=.5, position=position_dodge(1.5)) + theme(plot.title = element_text(size = 20, hjust = 0.5)) + theme(panel.border = element_rect(colour = "black", fill=NA, size=0.5)) + theme_cowplot() +
  theme(axis.text = element_text(size=10),
        axis.title = element_text(size=14),
        legend.text = element_text(size=12),
        legend.title = element_text(size=16),
        title = element_text(size=18)) +
  scale_y_continuous(name="P-tot (g/kg)") + labs(x = NULL) + theme(legend.title = element_blank()) + scale_colour_manual(values = MyPalette) + scale_x_reverse(breaks = c(60, 35, 25, 15, 5), labels = c("40-80 cm", "30-40 cm", "20-30 cm", "10-20 cm", "0-10 cm")) + coord_flip()

TP_gkg

Porg_mgkg

Porg_mgkg <- meta %>%
  group_by(sample_type, depth_numerical) %>%
  summarise(mean = mean(Porg_mgkg, na.rm = TRUE)/1000, se = (sd(Porg_mgkg, na.rm = TRUE)/sqrt(length((Porg_mgkg))))/1000) %>%
  ungroup() %>%
  ggplot(aes(y=mean, depth_numerical, color=sample_type)) +
  geom_line(linetype = "dashed") +
  geom_point(size=2, position=position_dodge(1.5)) +
  geom_errorbar(aes(ymin=mean-se, ymax=mean+se), width=.5, position=position_dodge(1.5)) + theme(plot.title = element_text(size = 20, hjust = 0.5)) + theme(panel.border = element_rect(colour = "black", fill=NA, size=0.5)) + theme_cowplot() +
  theme(axis.text = element_text(size=10),
        axis.title = element_text(size=14),
        legend.text = element_text(size=12),
        legend.title = element_text(size=16),
        title = element_text(size=18)) +
  scale_y_continuous(name="P-org (g/kg)") + labs(x = NULL) + theme(legend.title = element_blank()) + scale_colour_manual(values = MyPalette) + scale_x_reverse(breaks = c(60, 35, 25, 15, 5), labels = c("40-80 cm", "30-40 cm", "20-30 cm", "10-20 cm", "0-10 cm")) + coord_flip()

Porg_mgkg

DOC_mgkg

DOC_mgkg <- meta %>%
  group_by(sample_type, depth_numerical) %>%
  summarise(mean = mean(DOC_mgkg, na.rm = TRUE)/1000, se = (sd(DOC_mgkg, na.rm = TRUE)/sqrt(length((DOC_mgkg))))/1000) %>%
  ungroup() %>%
  ggplot(aes(y=mean, depth_numerical, color=sample_type)) +
  geom_line(linetype = "dashed") +
  geom_point(size=2, position=position_dodge(1.5)) +
  geom_errorbar(aes(ymin=mean-se, ymax=mean+se), width=.5, position=position_dodge(1.5)) + theme(plot.title = element_text(size = 20, hjust = 0.5)) + theme(panel.border = element_rect(colour = "black", fill=NA, size=0.5)) + theme_cowplot() +
  theme(axis.text = element_text(size=10),
        axis.title = element_text(size=14),
        legend.text = element_text(size=12),
        legend.title = element_text(size=16),
        title = element_text(size=18)) +
  scale_y_continuous(name="DOC (g/kg)") + labs(x = NULL) + theme(legend.title = element_blank()) + scale_colour_manual(values = MyPalette) + scale_x_reverse(breaks = c(60, 35, 25, 15, 5), labels = c("40-80 cm", "30-40 cm", "20-30 cm", "10-20 cm", "0-10 cm")) + coord_flip()

DOC_mgkg

# C_per_N

C_per_N <- meta %>%
  group_by(sample_type, depth_numerical) %>%
  summarise(mean = mean(C_per_N, na.rm = TRUE), se = (sd(C_per_N, na.rm = TRUE)/sqrt(length((C_per_N))))) %>%
  ungroup() %>%
  ggplot(aes(y=mean, depth_numerical, color=sample_type)) +
  geom_line(linetype = "dashed") +
  geom_point(size=2, position=position_dodge(1.5)) +
  geom_errorbar(aes(ymin=mean-se, ymax=mean+se), width=.5, position=position_dodge(1.5)) + theme(plot.title = element_text(size = 20, hjust = 0.5)) + theme(panel.border = element_rect(colour = "black", fill=NA, size=0.5)) + theme_cowplot() +
  theme(axis.text = element_text(size=10),
        axis.title = element_text(size=14),
        legend.text = element_text(size=12),
        legend.title = element_text(size=16),
        title = element_text(size=18)) +
  scale_y_continuous(name="C/N") + labs(x = NULL) + theme(legend.title = element_blank()) + scale_colour_manual(values = MyPalette) + scale_x_reverse(breaks = c(60, 35, 25, 15, 5), labels = c("40-80 cm", "30-40 cm", "20-30 cm", "10-20 cm", "0-10 cm")) + coord_flip()

C_per_N

log_root

log_root <- meta %>%
  group_by(sample_type, depth_numerical) %>%
  summarise(mean = mean(log_root, na.rm = TRUE), se = (sd(log_root, na.rm = TRUE)/sqrt(length((log_root))))) %>%
  ungroup() %>%
  ggplot(aes(y=mean, depth_numerical, color=sample_type)) +
  geom_line(linetype = "dashed") +
  geom_point(size=2, position=position_dodge(1.5)) +
  geom_errorbar(aes(ymin=mean-se, ymax=mean+se), width=.5, position=position_dodge(1.5)) + theme(plot.title = element_text(size = 20, hjust = 0.5)) + theme(panel.border = element_rect(colour = "black", fill=NA, size=0.5)) + theme_cowplot() +
  theme(axis.text = element_text(size=10),
        axis.title = element_text(size=14),
        legend.text = element_text(size=12),
        legend.title = element_text(size=16),
        title = element_text(size=18)) +
  scale_y_continuous(name="log10 root biomass (g/kg)") + labs(x = NULL) + theme(legend.title = element_blank()) + scale_colour_manual(values = MyPalette) + scale_x_reverse(breaks = c(60, 35, 25, 15, 5), labels = c("40-80 cm", "30-40 cm", "20-30 cm", "10-20 cm", "0-10 cm")) + coord_flip()

log_root

C_per_N

nc <- meta %>%
  group_by(sample_type, depth_numerical) %>%
  summarise(mean = mean(C_per_N, na.rm = TRUE), se = (sd(C_per_N, na.rm = TRUE)/sqrt(length((C_per_N))))) %>%
  ungroup() %>%
  ggplot(aes(y=mean, depth_numerical, color=sample_type)) +
  geom_line(linetype = "dashed") +
  geom_point(size=2, position=position_dodge(1.5)) +
  geom_errorbar(aes(ymin=mean-se, ymax=mean+se), width=.5, position=position_dodge(1.5)) + theme(plot.title = element_text(size = 20, hjust = 0.5)) + theme(panel.border = element_rect(colour = "black", fill=NA, size=0.5)) + theme_cowplot() +
  theme(axis.text = element_text(size=10),
        axis.title = element_text(size=14),
        legend.text = element_text(size=12),
        legend.title = element_text(size=16),
        title = element_text(size=18)) +
  scale_y_continuous(name="C/N") + labs(x = NULL) + theme(legend.title = element_blank()) + scale_colour_manual(values = MyPalette) + scale_x_reverse(breaks = c(60, 35, 25, 15, 5), labels = c("40-80 cm", "30-40 cm", "20-30 cm", "10-20 cm", "0-10 cm")) + coord_flip()

nc

TP_gkg

p <- meta %>%
  group_by(sample_type, depth_numerical) %>%
  summarise(mean = mean(TP_gkg, na.rm = TRUE), se = (sd(TP_gkg, na.rm = TRUE)/sqrt(length((TP_gkg))))) %>%
  ungroup() %>%
  ggplot(aes(y=mean, depth_numerical, color=sample_type)) +
  geom_line(linetype = "dashed") +
  geom_point(size=2, position=position_dodge(1.5)) +
  geom_errorbar(aes(ymin=mean-se, ymax=mean+se), width=.5, position=position_dodge(1.5)) + theme(plot.title = element_text(size = 20, hjust = 0.5)) + theme(panel.border = element_rect(colour = "black", fill=NA, size=0.5)) + theme_cowplot() +
  theme(axis.text = element_text(size=10),
        axis.title = element_text(size=14),
        legend.text = element_text(size=12),
        legend.title = element_text(size=16),
        title = element_text(size=18)) +
  scale_y_continuous(name="P (g/kg)") + labs(x = NULL) + theme(legend.title = element_blank()) + scale_colour_manual(values = MyPalette) + scale_x_reverse(breaks = c(60, 35, 25, 15, 5), labels = c("40-80 cm", "30-40 cm", "20-30 cm", "10-20 cm", "0-10 cm")) + coord_flip()
p

combine figures

library("ggpubr")

figure <- ggarrange(C_g_per_kg, N_gkg + rremove("ylab") + rremove("y.text") + rremove("y.axis") + rremove("y.ticks"), C_per_N + rremove("ylab") + rremove("y.text") + rremove("y.axis") + rremove("y.ticks"), DOC_mgkg + rremove("ylab") + rremove("y.text") + rremove("y.axis") + rremove("y.ticks"), TP_gkg, Porg_mgkg + rremove("ylab") + rremove("y.text") + rremove("y.axis") + rremove("y.ticks"), Pinorg_mgkg + rremove("ylab") + rremove("y.text") + rremove("y.axis") + rremove("y.ticks"), PH2O_mgkg + rremove("ylab") + rremove("y.text") + rremove("y.axis") + rremove("y.ticks"), pH_H2O, Feox_mmolkg + rremove("ylab") + rremove("y.text") + rremove("y.axis") + rremove("y.ticks"), Alox_mmolkg + rremove("ylab") + rremove("y.text") + rremove("y.axis") + rremove("y.ticks"), log_root + rremove("ylab") + rremove("y.text") + rremove("y.axis") + rremove("y.ticks"),
                    labels = NULL,
                    ncol = 4, nrow = 3, common.legend = TRUE, legend="right")
figure